File: | src/usr.sbin/ldapd/btree.c |
Warning: | line 1027, column 19 Division by zero |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: btree.c,v 1.38 2017/05/26 21:23:14 sthen Exp $ */ | |||
2 | ||||
3 | /* | |||
4 | * Copyright (c) 2009, 2010 Martin Hedenfalk <martin@bzero.se> | |||
5 | * | |||
6 | * Permission to use, copy, modify, and distribute this software for any | |||
7 | * purpose with or without fee is hereby granted, provided that the above | |||
8 | * copyright notice and this permission notice appear in all copies. | |||
9 | * | |||
10 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |||
11 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |||
12 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |||
13 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |||
14 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |||
15 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |||
16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |||
17 | */ | |||
18 | ||||
19 | #include <sys/types.h> | |||
20 | #include <sys/tree.h> | |||
21 | #include <sys/stat.h> | |||
22 | #include <sys/queue.h> | |||
23 | #include <sys/uio.h> | |||
24 | ||||
25 | #include <assert.h> | |||
26 | #include <err.h> | |||
27 | #include <errno(*__errno()).h> | |||
28 | #include <fcntl.h> | |||
29 | #include <stddef.h> | |||
30 | #include <stdint.h> | |||
31 | #include <stdio.h> | |||
32 | #include <stdlib.h> | |||
33 | #include <string.h> | |||
34 | #include <time.h> | |||
35 | #include <unistd.h> | |||
36 | ||||
37 | #include "btree.h" | |||
38 | ||||
39 | /* #define DEBUG */ | |||
40 | ||||
41 | #ifdef DEBUG | |||
42 | # define DPRINTF(...) do { fprintf(stderr(&__sF[2]), "%s:%d: ", __func__, __LINE__42); \ | |||
43 | fprintf(stderr(&__sF[2]), __VA_ARGS__); \ | |||
44 | fprintf(stderr(&__sF[2]), "\n"); } while(0) | |||
45 | #else | |||
46 | # define DPRINTF(...) | |||
47 | #endif | |||
48 | ||||
49 | #define PAGESIZE4096 4096 | |||
50 | #define BT_MINKEYS4 4 | |||
51 | #define BT_MAGIC0xB3DBB3DB 0xB3DBB3DB | |||
52 | #define BT_VERSION4 4 | |||
53 | #define MAXKEYSIZE255 255 | |||
54 | ||||
55 | #define P_INVALID0xFFFFFFFF 0xFFFFFFFF | |||
56 | ||||
57 | #define F_ISSET(w, f)(((w) & (f)) == (f)) (((w) & (f)) == (f)) | |||
58 | #define MINIMUM(a,b)((a) < (b) ? (a) : (b)) ((a) < (b) ? (a) : (b)) | |||
59 | ||||
60 | typedef uint32_t pgno_t; | |||
61 | typedef uint16_t indx_t; | |||
62 | ||||
63 | /* There are four page types: meta, index, leaf and overflow. | |||
64 | * They all share the same page header. | |||
65 | */ | |||
66 | struct page { /* represents an on-disk page */ | |||
67 | pgno_t pgno; /* page number */ | |||
68 | #define P_BRANCH0x01 0x01 /* branch page */ | |||
69 | #define P_LEAF0x02 0x02 /* leaf page */ | |||
70 | #define P_OVERFLOW0x04 0x04 /* overflow page */ | |||
71 | #define P_META0x08 0x08 /* meta page */ | |||
72 | #define P_HEAD0x10 0x10 /* header page */ | |||
73 | uint32_t flags; | |||
74 | #define lowerb.fb.fb_lower b.fb.fb_lower | |||
75 | #define upperb.fb.fb_upper b.fb.fb_upper | |||
76 | #define p_next_pgnob.pb_next_pgno b.pb_next_pgno | |||
77 | union page_bounds { | |||
78 | struct { | |||
79 | indx_t fb_lower; /* lower bound of free space */ | |||
80 | indx_t fb_upper; /* upper bound of free space */ | |||
81 | } fb; | |||
82 | pgno_t pb_next_pgno; /* overflow page linked list */ | |||
83 | } b; | |||
84 | indx_t ptrs[1]; /* dynamic size */ | |||
85 | } __packed__attribute__((__packed__)); | |||
86 | ||||
87 | #define PAGEHDRSZ__builtin_offsetof(struct page, ptrs) offsetof(struct page, ptrs)__builtin_offsetof(struct page, ptrs) | |||
88 | ||||
89 | #define NUMKEYSP(p)(((p)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs )) >> 1) (((p)->lowerb.fb.fb_lower - PAGEHDRSZ__builtin_offsetof(struct page, ptrs)) >> 1) | |||
90 | #define NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1) (((mp)->page->lowerb.fb.fb_lower - PAGEHDRSZ__builtin_offsetof(struct page, ptrs)) >> 1) | |||
91 | #define SIZELEFT(mp)(indx_t)((mp)->page->b.fb.fb_upper - (mp)->page-> b.fb.fb_lower) (indx_t)((mp)->page->upperb.fb.fb_upper - (mp)->page->lowerb.fb.fb_lower) | |||
92 | #define PAGEFILL(bt, mp)(1000 * ((bt)->head.psize - __builtin_offsetof(struct page , ptrs) - (indx_t)((mp)->page->b.fb.fb_upper - (mp)-> page->b.fb.fb_lower)) / ((bt)->head.psize - __builtin_offsetof (struct page, ptrs))) (1000 * ((bt)->head.psize - PAGEHDRSZ__builtin_offsetof(struct page, ptrs) - SIZELEFT(mp)(indx_t)((mp)->page->b.fb.fb_upper - (mp)->page-> b.fb.fb_lower)) / \ | |||
93 | ((bt)->head.psize - PAGEHDRSZ__builtin_offsetof(struct page, ptrs))) | |||
94 | #define IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02)) F_ISSET((mp)->page->flags, P_LEAF)((((mp)->page->flags) & (0x02)) == (0x02)) | |||
95 | #define IS_BRANCH(mp)((((mp)->page->flags) & (0x01)) == (0x01)) F_ISSET((mp)->page->flags, P_BRANCH)((((mp)->page->flags) & (0x01)) == (0x01)) | |||
96 | #define IS_OVERFLOW(mp)((((mp)->page->flags) & (0x04)) == (0x04)) F_ISSET((mp)->page->flags, P_OVERFLOW)((((mp)->page->flags) & (0x04)) == (0x04)) | |||
97 | ||||
98 | struct bt_head { /* header page content */ | |||
99 | uint32_t magic; | |||
100 | uint32_t version; | |||
101 | uint32_t flags; | |||
102 | uint32_t psize; /* page size */ | |||
103 | } __packed__attribute__((__packed__)); | |||
104 | ||||
105 | struct bt_meta { /* meta (footer) page content */ | |||
106 | #define BT_TOMBSTONE0x01 0x01 /* file is replaced */ | |||
107 | uint32_t flags; | |||
108 | pgno_t root; /* page number of root page */ | |||
109 | pgno_t prev_meta; /* previous meta page number */ | |||
110 | time_t created_at; | |||
111 | uint32_t branch_pages; | |||
112 | uint32_t leaf_pages; | |||
113 | uint32_t overflow_pages; | |||
114 | uint32_t revisions; | |||
115 | uint32_t depth; | |||
116 | uint64_t entries; | |||
117 | unsigned char hash[SHA_DIGEST_LENGTH20]; | |||
118 | } __packed__attribute__((__packed__)); | |||
119 | ||||
120 | struct btkey { | |||
121 | size_t len; | |||
122 | char str[MAXKEYSIZE255]; | |||
123 | }; | |||
124 | ||||
125 | struct mpage { /* an in-memory cached page */ | |||
126 | RB_ENTRY(mpage)struct { struct mpage *rbe_left; struct mpage *rbe_right; struct mpage *rbe_parent; int rbe_color; } entry; /* page cache entry */ | |||
127 | SIMPLEQ_ENTRY(mpage)struct { struct mpage *sqe_next; } next; /* queue of dirty pages */ | |||
128 | TAILQ_ENTRY(mpage)struct { struct mpage *tqe_next; struct mpage **tqe_prev; } lru_next; /* LRU queue */ | |||
129 | struct mpage *parent; /* NULL if root */ | |||
130 | unsigned int parent_index; /* keep track of node index */ | |||
131 | struct btkey prefix; | |||
132 | struct page *page; | |||
133 | pgno_t pgno; /* copy of page->pgno */ | |||
134 | short ref; /* increased by cursors */ | |||
135 | short dirty; /* 1 if on dirty queue */ | |||
136 | }; | |||
137 | RB_HEAD(page_cache, mpage)struct page_cache { struct mpage *rbh_root; }; | |||
138 | SIMPLEQ_HEAD(dirty_queue, mpage)struct dirty_queue { struct mpage *sqh_first; struct mpage ** sqh_last; }; | |||
139 | TAILQ_HEAD(lru_queue, mpage)struct lru_queue { struct mpage *tqh_first; struct mpage **tqh_last ; }; | |||
140 | ||||
141 | static int mpage_cmp(struct mpage *a, struct mpage *b); | |||
142 | static struct mpage *mpage_lookup(struct btree *bt, pgno_t pgno); | |||
143 | static void mpage_add(struct btree *bt, struct mpage *mp); | |||
144 | static void mpage_free(struct mpage *mp); | |||
145 | static void mpage_del(struct btree *bt, struct mpage *mp); | |||
146 | static void mpage_flush(struct btree *bt); | |||
147 | static struct mpage *mpage_copy(struct btree *bt, struct mpage *mp); | |||
148 | static void mpage_prune(struct btree *bt); | |||
149 | static void mpage_dirty(struct btree *bt, struct mpage *mp); | |||
150 | static struct mpage *mpage_touch(struct btree *bt, struct mpage *mp); | |||
151 | ||||
152 | RB_PROTOTYPE(page_cache, mpage, entry, mpage_cmp)void page_cache_RB_INSERT_COLOR(struct page_cache *, struct mpage *); void page_cache_RB_REMOVE_COLOR(struct page_cache *, struct mpage *, struct mpage *); struct mpage *page_cache_RB_REMOVE (struct page_cache *, struct mpage *); struct mpage *page_cache_RB_INSERT (struct page_cache *, struct mpage *); struct mpage *page_cache_RB_FIND (struct page_cache *, struct mpage *); struct mpage *page_cache_RB_NFIND (struct page_cache *, struct mpage *); struct mpage *page_cache_RB_NEXT (struct mpage *); struct mpage *page_cache_RB_PREV(struct mpage *); struct mpage *page_cache_RB_MINMAX(struct page_cache *, int );; | |||
153 | RB_GENERATE(page_cache, mpage, entry, mpage_cmp)void page_cache_RB_INSERT_COLOR(struct page_cache *head, struct mpage *elm) { struct mpage *parent, *gparent, *tmp; while (( parent = (elm)->entry.rbe_parent) && (parent)-> entry.rbe_color == 1) { gparent = (parent)->entry.rbe_parent ; if (parent == (gparent)->entry.rbe_left) { tmp = (gparent )->entry.rbe_right; if (tmp && (tmp)->entry.rbe_color == 1) { (tmp)->entry.rbe_color = 0; do { (parent)->entry .rbe_color = 0; (gparent)->entry.rbe_color = 1; } while (0 ); elm = gparent; continue; } if ((parent)->entry.rbe_right == elm) { do { (tmp) = (parent)->entry.rbe_right; if (((parent )->entry.rbe_right = (tmp)->entry.rbe_left)) { ((tmp)-> entry.rbe_left)->entry.rbe_parent = (parent); } do {} while (0); if (((tmp)->entry.rbe_parent = (parent)->entry.rbe_parent )) { if ((parent) == ((parent)->entry.rbe_parent)->entry .rbe_left) ((parent)->entry.rbe_parent)->entry.rbe_left = (tmp); else ((parent)->entry.rbe_parent)->entry.rbe_right = (tmp); } else (head)->rbh_root = (tmp); (tmp)->entry .rbe_left = (parent); (parent)->entry.rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent)) do {} while ( 0); } while (0); tmp = parent; parent = elm; elm = tmp; } do { (parent)->entry.rbe_color = 0; (gparent)->entry.rbe_color = 1; } while (0); do { (tmp) = (gparent)->entry.rbe_left; if (((gparent)->entry.rbe_left = (tmp)->entry.rbe_right )) { ((tmp)->entry.rbe_right)->entry.rbe_parent = (gparent ); } do {} while (0); if (((tmp)->entry.rbe_parent = (gparent )->entry.rbe_parent)) { if ((gparent) == ((gparent)->entry .rbe_parent)->entry.rbe_left) ((gparent)->entry.rbe_parent )->entry.rbe_left = (tmp); else ((gparent)->entry.rbe_parent )->entry.rbe_right = (tmp); } else (head)->rbh_root = ( tmp); (tmp)->entry.rbe_right = (gparent); (gparent)->entry .rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent )) do {} while (0); } while (0); } else { tmp = (gparent)-> entry.rbe_left; if (tmp && (tmp)->entry.rbe_color == 1) { (tmp)->entry.rbe_color = 0; do { (parent)->entry. rbe_color = 0; (gparent)->entry.rbe_color = 1; } while (0) ; elm = gparent; continue; } if ((parent)->entry.rbe_left == elm) { do { (tmp) = (parent)->entry.rbe_left; if (((parent )->entry.rbe_left = (tmp)->entry.rbe_right)) { ((tmp)-> entry.rbe_right)->entry.rbe_parent = (parent); } do {} while (0); if (((tmp)->entry.rbe_parent = (parent)->entry.rbe_parent )) { if ((parent) == ((parent)->entry.rbe_parent)->entry .rbe_left) ((parent)->entry.rbe_parent)->entry.rbe_left = (tmp); else ((parent)->entry.rbe_parent)->entry.rbe_right = (tmp); } else (head)->rbh_root = (tmp); (tmp)->entry .rbe_right = (parent); (parent)->entry.rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent)) do {} while (0); } while (0); tmp = parent; parent = elm; elm = tmp; } do { (parent)->entry.rbe_color = 0; (gparent)->entry.rbe_color = 1; } while (0); do { (tmp) = (gparent)->entry.rbe_right ; if (((gparent)->entry.rbe_right = (tmp)->entry.rbe_left )) { ((tmp)->entry.rbe_left)->entry.rbe_parent = (gparent ); } do {} while (0); if (((tmp)->entry.rbe_parent = (gparent )->entry.rbe_parent)) { if ((gparent) == ((gparent)->entry .rbe_parent)->entry.rbe_left) ((gparent)->entry.rbe_parent )->entry.rbe_left = (tmp); else ((gparent)->entry.rbe_parent )->entry.rbe_right = (tmp); } else (head)->rbh_root = ( tmp); (tmp)->entry.rbe_left = (gparent); (gparent)->entry .rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent )) do {} while (0); } while (0); } } (head->rbh_root)-> entry.rbe_color = 0; } void page_cache_RB_REMOVE_COLOR(struct page_cache *head, struct mpage *parent, struct mpage *elm) { struct mpage *tmp; while ((elm == ((void *)0) || (elm)->entry .rbe_color == 0) && elm != (head)->rbh_root) { if ( (parent)->entry.rbe_left == elm) { tmp = (parent)->entry .rbe_right; if ((tmp)->entry.rbe_color == 1) { do { (tmp)-> entry.rbe_color = 0; (parent)->entry.rbe_color = 1; } while (0); do { (tmp) = (parent)->entry.rbe_right; if (((parent )->entry.rbe_right = (tmp)->entry.rbe_left)) { ((tmp)-> entry.rbe_left)->entry.rbe_parent = (parent); } do {} while (0); if (((tmp)->entry.rbe_parent = (parent)->entry.rbe_parent )) { if ((parent) == ((parent)->entry.rbe_parent)->entry .rbe_left) ((parent)->entry.rbe_parent)->entry.rbe_left = (tmp); else ((parent)->entry.rbe_parent)->entry.rbe_right = (tmp); } else (head)->rbh_root = (tmp); (tmp)->entry .rbe_left = (parent); (parent)->entry.rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent)) do {} while ( 0); } while (0); tmp = (parent)->entry.rbe_right; } if ((( tmp)->entry.rbe_left == ((void *)0) || ((tmp)->entry.rbe_left )->entry.rbe_color == 0) && ((tmp)->entry.rbe_right == ((void *)0) || ((tmp)->entry.rbe_right)->entry.rbe_color == 0)) { (tmp)->entry.rbe_color = 1; elm = parent; parent = (elm)->entry.rbe_parent; } else { if ((tmp)->entry.rbe_right == ((void *)0) || ((tmp)->entry.rbe_right)->entry.rbe_color == 0) { struct mpage *oleft; if ((oleft = (tmp)->entry.rbe_left )) (oleft)->entry.rbe_color = 0; (tmp)->entry.rbe_color = 1; do { (oleft) = (tmp)->entry.rbe_left; if (((tmp)-> entry.rbe_left = (oleft)->entry.rbe_right)) { ((oleft)-> entry.rbe_right)->entry.rbe_parent = (tmp); } do {} while ( 0); if (((oleft)->entry.rbe_parent = (tmp)->entry.rbe_parent )) { if ((tmp) == ((tmp)->entry.rbe_parent)->entry.rbe_left ) ((tmp)->entry.rbe_parent)->entry.rbe_left = (oleft); else ((tmp)->entry.rbe_parent)->entry.rbe_right = (oleft); } else (head)->rbh_root = (oleft); (oleft)->entry.rbe_right = (tmp); (tmp)->entry.rbe_parent = (oleft); do {} while ( 0); if (((oleft)->entry.rbe_parent)) do {} while (0); } while (0); tmp = (parent)->entry.rbe_right; } (tmp)->entry.rbe_color = (parent)->entry.rbe_color; (parent)->entry.rbe_color = 0; if ((tmp)->entry.rbe_right) ((tmp)->entry.rbe_right )->entry.rbe_color = 0; do { (tmp) = (parent)->entry.rbe_right ; if (((parent)->entry.rbe_right = (tmp)->entry.rbe_left )) { ((tmp)->entry.rbe_left)->entry.rbe_parent = (parent ); } do {} while (0); if (((tmp)->entry.rbe_parent = (parent )->entry.rbe_parent)) { if ((parent) == ((parent)->entry .rbe_parent)->entry.rbe_left) ((parent)->entry.rbe_parent )->entry.rbe_left = (tmp); else ((parent)->entry.rbe_parent )->entry.rbe_right = (tmp); } else (head)->rbh_root = ( tmp); (tmp)->entry.rbe_left = (parent); (parent)->entry .rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent )) do {} while (0); } while (0); elm = (head)->rbh_root; break ; } } else { tmp = (parent)->entry.rbe_left; if ((tmp)-> entry.rbe_color == 1) { do { (tmp)->entry.rbe_color = 0; ( parent)->entry.rbe_color = 1; } while (0); do { (tmp) = (parent )->entry.rbe_left; if (((parent)->entry.rbe_left = (tmp )->entry.rbe_right)) { ((tmp)->entry.rbe_right)->entry .rbe_parent = (parent); } do {} while (0); if (((tmp)->entry .rbe_parent = (parent)->entry.rbe_parent)) { if ((parent) == ((parent)->entry.rbe_parent)->entry.rbe_left) ((parent )->entry.rbe_parent)->entry.rbe_left = (tmp); else ((parent )->entry.rbe_parent)->entry.rbe_right = (tmp); } else ( head)->rbh_root = (tmp); (tmp)->entry.rbe_right = (parent ); (parent)->entry.rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent)) do {} while (0); } while (0); tmp = (parent)->entry.rbe_left; } if (((tmp)->entry.rbe_left == ((void *)0) || ((tmp)->entry.rbe_left)->entry.rbe_color == 0) && ((tmp)->entry.rbe_right == ((void *)0) || ((tmp)->entry.rbe_right)->entry.rbe_color == 0)) { (tmp )->entry.rbe_color = 1; elm = parent; parent = (elm)->entry .rbe_parent; } else { if ((tmp)->entry.rbe_left == ((void * )0) || ((tmp)->entry.rbe_left)->entry.rbe_color == 0) { struct mpage *oright; if ((oright = (tmp)->entry.rbe_right )) (oright)->entry.rbe_color = 0; (tmp)->entry.rbe_color = 1; do { (oright) = (tmp)->entry.rbe_right; if (((tmp)-> entry.rbe_right = (oright)->entry.rbe_left)) { ((oright)-> entry.rbe_left)->entry.rbe_parent = (tmp); } do {} while ( 0); if (((oright)->entry.rbe_parent = (tmp)->entry.rbe_parent )) { if ((tmp) == ((tmp)->entry.rbe_parent)->entry.rbe_left ) ((tmp)->entry.rbe_parent)->entry.rbe_left = (oright); else ((tmp)->entry.rbe_parent)->entry.rbe_right = (oright ); } else (head)->rbh_root = (oright); (oright)->entry. rbe_left = (tmp); (tmp)->entry.rbe_parent = (oright); do { } while (0); if (((oright)->entry.rbe_parent)) do {} while (0); } while (0); tmp = (parent)->entry.rbe_left; } (tmp) ->entry.rbe_color = (parent)->entry.rbe_color; (parent) ->entry.rbe_color = 0; if ((tmp)->entry.rbe_left) ((tmp )->entry.rbe_left)->entry.rbe_color = 0; do { (tmp) = ( parent)->entry.rbe_left; if (((parent)->entry.rbe_left = (tmp)->entry.rbe_right)) { ((tmp)->entry.rbe_right)-> entry.rbe_parent = (parent); } do {} while (0); if (((tmp)-> entry.rbe_parent = (parent)->entry.rbe_parent)) { if ((parent ) == ((parent)->entry.rbe_parent)->entry.rbe_left) ((parent )->entry.rbe_parent)->entry.rbe_left = (tmp); else ((parent )->entry.rbe_parent)->entry.rbe_right = (tmp); } else ( head)->rbh_root = (tmp); (tmp)->entry.rbe_right = (parent ); (parent)->entry.rbe_parent = (tmp); do {} while (0); if (((tmp)->entry.rbe_parent)) do {} while (0); } while (0); elm = (head)->rbh_root; break; } } } if (elm) (elm)->entry .rbe_color = 0; } struct mpage * page_cache_RB_REMOVE(struct page_cache *head, struct mpage *elm) { struct mpage *child, *parent, *old = elm; int color; if ((elm)->entry.rbe_left == ((void *)0 )) child = (elm)->entry.rbe_right; else if ((elm)->entry .rbe_right == ((void *)0)) child = (elm)->entry.rbe_left; else { struct mpage *left; elm = (elm)->entry.rbe_right; while ((left = (elm)->entry.rbe_left)) elm = left; child = (elm )->entry.rbe_right; parent = (elm)->entry.rbe_parent; color = (elm)->entry.rbe_color; if (child) (child)->entry.rbe_parent = parent; if (parent) { if ((parent)->entry.rbe_left == elm ) (parent)->entry.rbe_left = child; else (parent)->entry .rbe_right = child; do {} while (0); } else (head)->rbh_root = child; if ((elm)->entry.rbe_parent == old) parent = elm ; (elm)->entry = (old)->entry; if ((old)->entry.rbe_parent ) { if (((old)->entry.rbe_parent)->entry.rbe_left == old ) ((old)->entry.rbe_parent)->entry.rbe_left = elm; else ((old)->entry.rbe_parent)->entry.rbe_right = elm; do { } while (0); } else (head)->rbh_root = elm; ((old)->entry .rbe_left)->entry.rbe_parent = elm; if ((old)->entry.rbe_right ) ((old)->entry.rbe_right)->entry.rbe_parent = elm; if ( parent) { left = parent; do { do {} while (0); } while ((left = (left)->entry.rbe_parent)); } goto color; } parent = (elm )->entry.rbe_parent; color = (elm)->entry.rbe_color; if (child) (child)->entry.rbe_parent = parent; if (parent) { if ((parent)->entry.rbe_left == elm) (parent)->entry.rbe_left = child; else (parent)->entry.rbe_right = child; do {} while (0); } else (head)->rbh_root = child; color: if (color == 0) page_cache_RB_REMOVE_COLOR(head, parent, child); return ( old); } struct mpage * page_cache_RB_INSERT(struct page_cache *head, struct mpage *elm) { struct mpage *tmp; struct mpage * parent = ((void *)0); int comp = 0; tmp = (head)->rbh_root ; while (tmp) { parent = tmp; comp = (mpage_cmp)(elm, parent) ; if (comp < 0) tmp = (tmp)->entry.rbe_left; else if (comp > 0) tmp = (tmp)->entry.rbe_right; else return (tmp); } do { (elm)->entry.rbe_parent = parent; (elm)->entry.rbe_left = (elm)->entry.rbe_right = ((void *)0); (elm)->entry.rbe_color = 1; } while (0); if (parent != ((void *)0)) { if (comp < 0) (parent)->entry.rbe_left = elm; else (parent)->entry .rbe_right = elm; do {} while (0); } else (head)->rbh_root = elm; page_cache_RB_INSERT_COLOR(head, elm); return (((void *)0)); } struct mpage * page_cache_RB_FIND(struct page_cache *head, struct mpage *elm) { struct mpage *tmp = (head)->rbh_root ; int comp; while (tmp) { comp = mpage_cmp(elm, tmp); if (comp < 0) tmp = (tmp)->entry.rbe_left; else if (comp > 0 ) tmp = (tmp)->entry.rbe_right; else return (tmp); } return (((void *)0)); } struct mpage * page_cache_RB_NFIND(struct page_cache *head, struct mpage *elm) { struct mpage *tmp = (head)->rbh_root ; struct mpage *res = ((void *)0); int comp; while (tmp) { comp = mpage_cmp(elm, tmp); if (comp < 0) { res = tmp; tmp = ( tmp)->entry.rbe_left; } else if (comp > 0) tmp = (tmp)-> entry.rbe_right; else return (tmp); } return (res); } struct mpage * page_cache_RB_NEXT(struct mpage *elm) { if ((elm)->entry .rbe_right) { elm = (elm)->entry.rbe_right; while ((elm)-> entry.rbe_left) elm = (elm)->entry.rbe_left; } else { if ( (elm)->entry.rbe_parent && (elm == ((elm)->entry .rbe_parent)->entry.rbe_left)) elm = (elm)->entry.rbe_parent ; else { while ((elm)->entry.rbe_parent && (elm == ((elm)->entry.rbe_parent)->entry.rbe_right)) elm = (elm )->entry.rbe_parent; elm = (elm)->entry.rbe_parent; } } return (elm); } struct mpage * page_cache_RB_PREV(struct mpage *elm) { if ((elm)->entry.rbe_left) { elm = (elm)->entry .rbe_left; while ((elm)->entry.rbe_right) elm = (elm)-> entry.rbe_right; } else { if ((elm)->entry.rbe_parent && (elm == ((elm)->entry.rbe_parent)->entry.rbe_right)) elm = (elm)->entry.rbe_parent; else { while ((elm)->entry. rbe_parent && (elm == ((elm)->entry.rbe_parent)-> entry.rbe_left)) elm = (elm)->entry.rbe_parent; elm = (elm )->entry.rbe_parent; } } return (elm); } struct mpage * page_cache_RB_MINMAX (struct page_cache *head, int val) { struct mpage *tmp = (head )->rbh_root; struct mpage *parent = ((void *)0); while (tmp ) { parent = tmp; if (val < 0) tmp = (tmp)->entry.rbe_left ; else tmp = (tmp)->entry.rbe_right; } return (parent); }; | |||
154 | ||||
155 | struct ppage { /* ordered list of pages */ | |||
156 | SLIST_ENTRY(ppage)struct { struct ppage *sle_next; } entry; | |||
157 | struct mpage *mpage; | |||
158 | unsigned int ki; /* cursor index on page */ | |||
159 | }; | |||
160 | SLIST_HEAD(page_stack, ppage)struct page_stack { struct ppage *slh_first; }; | |||
161 | ||||
162 | #define CURSOR_EMPTY(c)(((&(c)->stack)->slh_first) == ((void *)0)) SLIST_EMPTY(&(c)->stack)(((&(c)->stack)->slh_first) == ((void *)0)) | |||
163 | #define CURSOR_TOP(c)((&(c)->stack)->slh_first) SLIST_FIRST(&(c)->stack)((&(c)->stack)->slh_first) | |||
164 | #define CURSOR_POP(c)do { (&(c)->stack)->slh_first = (&(c)->stack )->slh_first->entry.sle_next; } while (0) SLIST_REMOVE_HEAD(&(c)->stack, entry)do { (&(c)->stack)->slh_first = (&(c)->stack )->slh_first->entry.sle_next; } while (0) | |||
165 | #define CURSOR_PUSH(c,p)do { (p)->entry.sle_next = (&(c)->stack)->slh_first ; (&(c)->stack)->slh_first = (p); } while (0) SLIST_INSERT_HEAD(&(c)->stack, p, entry)do { (p)->entry.sle_next = (&(c)->stack)->slh_first ; (&(c)->stack)->slh_first = (p); } while (0) | |||
166 | ||||
167 | struct cursor { | |||
168 | struct btree *bt; | |||
169 | struct btree_txn *txn; | |||
170 | struct page_stack stack; /* stack of parent pages */ | |||
171 | short initialized; /* 1 if initialized */ | |||
172 | short eof; /* 1 if end is reached */ | |||
173 | }; | |||
174 | ||||
175 | #define METAHASHLEN__builtin_offsetof(struct bt_meta, hash) offsetof(struct bt_meta, hash)__builtin_offsetof(struct bt_meta, hash) | |||
176 | #define METADATA(p)((void *)((char *)p + __builtin_offsetof(struct page, ptrs))) ((void *)((char *)p + PAGEHDRSZ__builtin_offsetof(struct page, ptrs))) | |||
177 | ||||
178 | struct node { | |||
179 | #define n_pgnop.np_pgno p.np_pgno | |||
180 | #define n_dsizep.np_dsize p.np_dsize | |||
181 | union { | |||
182 | pgno_t np_pgno; /* child page number */ | |||
183 | uint32_t np_dsize; /* leaf data size */ | |||
184 | } p; | |||
185 | uint16_t ksize; /* key size */ | |||
186 | #define F_BIGDATA0x01 0x01 /* data put on overflow page */ | |||
187 | uint8_t flags; | |||
188 | char data[1]; | |||
189 | } __packed__attribute__((__packed__)); | |||
190 | ||||
191 | struct btree_txn { | |||
192 | pgno_t root; /* current / new root page */ | |||
193 | pgno_t next_pgno; /* next unallocated page */ | |||
194 | struct btree *bt; /* btree is ref'd */ | |||
195 | struct dirty_queue *dirty_queue; /* modified pages */ | |||
196 | #define BT_TXN_RDONLY0x01 0x01 /* read-only transaction */ | |||
197 | #define BT_TXN_ERROR0x02 0x02 /* an error has occurred */ | |||
198 | unsigned int flags; | |||
199 | }; | |||
200 | ||||
201 | struct btree { | |||
202 | int fd; | |||
203 | char *path; | |||
204 | #define BT_FIXPADDING0x01 0x01 /* internal */ | |||
205 | unsigned int flags; | |||
206 | bt_cmp_func cmp; /* user compare function */ | |||
207 | struct bt_head head; | |||
208 | struct bt_meta meta; | |||
209 | struct page_cache *page_cache; | |||
210 | struct lru_queue *lru_queue; | |||
211 | struct btree_txn *txn; /* current write transaction */ | |||
212 | int ref; /* increased by cursors & txn */ | |||
213 | struct btree_stat stat; | |||
214 | off_t size; /* current file size */ | |||
215 | }; | |||
216 | ||||
217 | #define NODESIZE__builtin_offsetof(struct node, data) offsetof(struct node, data)__builtin_offsetof(struct node, data) | |||
218 | ||||
219 | #define INDXSIZE(k)(__builtin_offsetof(struct node, data) + ((k) == ((void *)0) ? 0 : (k)->size)) (NODESIZE__builtin_offsetof(struct node, data) + ((k) == NULL((void *)0) ? 0 : (k)->size)) | |||
220 | #define LEAFSIZE(k, d)(__builtin_offsetof(struct node, data) + (k)->size + (d)-> size) (NODESIZE__builtin_offsetof(struct node, data) + (k)->size + (d)->size) | |||
221 | #define NODEPTRP(p, i)((struct node *)((char *)(p) + (p)->ptrs[i])) ((struct node *)((char *)(p) + (p)->ptrs[i])) | |||
222 | #define NODEPTR(mp, i)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[i])) NODEPTRP((mp)->page, i)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[i])) | |||
223 | #define NODEKEY(node)(void *)((node)->data) (void *)((node)->data) | |||
224 | #define NODEDATA(node)(void *)((char *)(node)->data + (node)->ksize) (void *)((char *)(node)->data + (node)->ksize) | |||
225 | #define NODEPGNO(node)((node)->p.np_pgno) ((node)->p.np_pgno) | |||
226 | #define NODEDSZ(node)((node)->p.np_dsize) ((node)->p.np_dsize) | |||
227 | ||||
228 | #define BT_COMMIT_PAGES64 64 /* max number of pages to write in one commit */ | |||
229 | #define BT_MAXCACHE_DEF1024 1024 /* max number of pages to keep in cache */ | |||
230 | ||||
231 | static int btree_read_page(struct btree *bt, pgno_t pgno, | |||
232 | struct page *page); | |||
233 | static struct mpage *btree_get_mpage(struct btree *bt, pgno_t pgno); | |||
234 | static int btree_search_page_root(struct btree *bt, | |||
235 | struct mpage *root, struct btval *key, | |||
236 | struct cursor *cursor, int modify, | |||
237 | struct mpage **mpp); | |||
238 | static int btree_search_page(struct btree *bt, | |||
239 | struct btree_txn *txn, struct btval *key, | |||
240 | struct cursor *cursor, int modify, | |||
241 | struct mpage **mpp); | |||
242 | ||||
243 | static int btree_write_header(struct btree *bt, int fd); | |||
244 | static int btree_read_header(struct btree *bt); | |||
245 | static int btree_is_meta_page(struct page *p); | |||
246 | static int btree_read_meta(struct btree *bt, pgno_t *p_next); | |||
247 | static int btree_write_meta(struct btree *bt, pgno_t root, | |||
248 | unsigned int flags); | |||
249 | static void btree_ref(struct btree *bt); | |||
250 | ||||
251 | static struct node *btree_search_node(struct btree *bt, struct mpage *mp, | |||
252 | struct btval *key, int *exactp, unsigned int *kip); | |||
253 | static int btree_add_node(struct btree *bt, struct mpage *mp, | |||
254 | indx_t indx, struct btval *key, struct btval *data, | |||
255 | pgno_t pgno, uint8_t flags); | |||
256 | static void btree_del_node(struct btree *bt, struct mpage *mp, | |||
257 | indx_t indx); | |||
258 | static int btree_read_data(struct btree *bt, struct mpage *mp, | |||
259 | struct node *leaf, struct btval *data); | |||
260 | ||||
261 | static int btree_rebalance(struct btree *bt, struct mpage *mp); | |||
262 | static int btree_update_key(struct btree *bt, struct mpage *mp, | |||
263 | indx_t indx, struct btval *key); | |||
264 | static int btree_adjust_prefix(struct btree *bt, | |||
265 | struct mpage *src, int delta); | |||
266 | static int btree_move_node(struct btree *bt, struct mpage *src, | |||
267 | indx_t srcindx, struct mpage *dst, indx_t dstindx); | |||
268 | static int btree_merge(struct btree *bt, struct mpage *src, | |||
269 | struct mpage *dst); | |||
270 | static int btree_split(struct btree *bt, struct mpage **mpp, | |||
271 | unsigned int *newindxp, struct btval *newkey, | |||
272 | struct btval *newdata, pgno_t newpgno); | |||
273 | static struct mpage *btree_new_page(struct btree *bt, uint32_t flags); | |||
274 | static int btree_write_overflow_data(struct btree *bt, | |||
275 | struct page *p, struct btval *data); | |||
276 | ||||
277 | static void cursor_pop_page(struct cursor *cursor); | |||
278 | static struct ppage *cursor_push_page(struct cursor *cursor, | |||
279 | struct mpage *mp); | |||
280 | ||||
281 | static int bt_set_key(struct btree *bt, struct mpage *mp, | |||
282 | struct node *node, struct btval *key); | |||
283 | static int btree_sibling(struct cursor *cursor, int move_right); | |||
284 | static int btree_cursor_next(struct cursor *cursor, | |||
285 | struct btval *key, struct btval *data); | |||
286 | static int btree_cursor_set(struct cursor *cursor, | |||
287 | struct btval *key, struct btval *data, int *exactp); | |||
288 | static int btree_cursor_first(struct cursor *cursor, | |||
289 | struct btval *key, struct btval *data); | |||
290 | ||||
291 | static void bt_reduce_separator(struct btree *bt, struct node *min, | |||
292 | struct btval *sep); | |||
293 | static void remove_prefix(struct btree *bt, struct btval *key, | |||
294 | size_t pfxlen); | |||
295 | static void expand_prefix(struct btree *bt, struct mpage *mp, | |||
296 | indx_t indx, struct btkey *expkey); | |||
297 | static void concat_prefix(struct btree *bt, char *s1, size_t n1, | |||
298 | char *s2, size_t n2, char *cs, size_t *cn); | |||
299 | static void common_prefix(struct btree *bt, struct btkey *min, | |||
300 | struct btkey *max, struct btkey *pfx); | |||
301 | static void find_common_prefix(struct btree *bt, struct mpage *mp); | |||
302 | ||||
303 | static size_t bt_leaf_size(struct btree *bt, struct btval *key, | |||
304 | struct btval *data); | |||
305 | static size_t bt_branch_size(struct btree *bt, struct btval *key); | |||
306 | ||||
307 | static pgno_t btree_compact_tree(struct btree *bt, pgno_t pgno, | |||
308 | struct btree *btc); | |||
309 | ||||
310 | static int memncmp(const void *s1, size_t n1, | |||
311 | const void *s2, size_t n2); | |||
312 | static int memnrcmp(const void *s1, size_t n1, | |||
313 | const void *s2, size_t n2); | |||
314 | ||||
315 | static int | |||
316 | memncmp(const void *s1, size_t n1, const void *s2, size_t n2) | |||
317 | { | |||
318 | if (n1 < n2) { | |||
319 | if (memcmp(s1, s2, n1) == 0) | |||
320 | return -1; | |||
321 | } | |||
322 | else if (n1 > n2) { | |||
323 | if (memcmp(s1, s2, n2) == 0) | |||
324 | return 1; | |||
325 | } | |||
326 | return memcmp(s1, s2, n1); | |||
327 | } | |||
328 | ||||
329 | static int | |||
330 | memnrcmp(const void *s1, size_t n1, const void *s2, size_t n2) | |||
331 | { | |||
332 | const unsigned char *p1; | |||
333 | const unsigned char *p2; | |||
334 | ||||
335 | if (n1 == 0) | |||
336 | return n2 == 0 ? 0 : -1; | |||
337 | ||||
338 | if (n2 == 0) | |||
339 | return n1 == 0 ? 0 : 1; | |||
340 | ||||
341 | p1 = (const unsigned char *)s1 + n1 - 1; | |||
342 | p2 = (const unsigned char *)s2 + n2 - 1; | |||
343 | ||||
344 | while (*p1 == *p2) { | |||
345 | if (p1 == s1) | |||
346 | return (p2 == s2) ? 0 : -1; | |||
347 | if (p2 == s2) | |||
348 | return (p1 == p2) ? 0 : 1; | |||
349 | p1--; | |||
350 | p2--; | |||
351 | } | |||
352 | return *p1 - *p2; | |||
353 | } | |||
354 | ||||
355 | int | |||
356 | btree_cmp(struct btree *bt, const struct btval *a, const struct btval *b) | |||
357 | { | |||
358 | return bt->cmp(a, b); | |||
359 | } | |||
360 | ||||
361 | static void | |||
362 | common_prefix(struct btree *bt, struct btkey *min, struct btkey *max, | |||
363 | struct btkey *pfx) | |||
364 | { | |||
365 | size_t n = 0; | |||
366 | char *p1; | |||
367 | char *p2; | |||
368 | ||||
369 | if (min->len == 0 || max->len == 0) { | |||
370 | pfx->len = 0; | |||
371 | return; | |||
372 | } | |||
373 | ||||
374 | if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) { | |||
375 | p1 = min->str + min->len - 1; | |||
376 | p2 = max->str + max->len - 1; | |||
377 | ||||
378 | while (*p1 == *p2) { | |||
379 | if (p1 < min->str || p2 < max->str) | |||
380 | break; | |||
381 | p1--; | |||
382 | p2--; | |||
383 | n++; | |||
384 | } | |||
385 | ||||
386 | assert(n <= (int)sizeof(pfx->str))((n <= (int)sizeof(pfx->str)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 386, __func__, "n <= (int)sizeof(pfx->str)")); | |||
387 | pfx->len = n; | |||
388 | bcopy(p2 + 1, pfx->str, n); | |||
389 | } else { | |||
390 | p1 = min->str; | |||
391 | p2 = max->str; | |||
392 | ||||
393 | while (*p1 == *p2) { | |||
394 | if (n == min->len || n == max->len) | |||
395 | break; | |||
396 | p1++; | |||
397 | p2++; | |||
398 | n++; | |||
399 | } | |||
400 | ||||
401 | assert(n <= (int)sizeof(pfx->str))((n <= (int)sizeof(pfx->str)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 401, __func__, "n <= (int)sizeof(pfx->str)")); | |||
402 | pfx->len = n; | |||
403 | bcopy(max->str, pfx->str, n); | |||
404 | } | |||
405 | } | |||
406 | ||||
407 | static void | |||
408 | remove_prefix(struct btree *bt, struct btval *key, size_t pfxlen) | |||
409 | { | |||
410 | if (pfxlen == 0 || bt->cmp != NULL((void *)0)) | |||
411 | return; | |||
412 | ||||
413 | DPRINTF("removing %zu bytes of prefix from key [%.*s]", pfxlen, | |||
414 | (int)key->size, (char *)key->data); | |||
415 | assert(pfxlen <= key->size)((pfxlen <= key->size) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 415, __func__, "pfxlen <= key->size")); | |||
416 | key->size -= pfxlen; | |||
417 | if (!F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) | |||
418 | key->data = (char *)key->data + pfxlen; | |||
419 | } | |||
420 | ||||
421 | static void | |||
422 | expand_prefix(struct btree *bt, struct mpage *mp, indx_t indx, | |||
423 | struct btkey *expkey) | |||
424 | { | |||
425 | struct node *node; | |||
426 | ||||
427 | node = NODEPTR(mp, indx)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[indx])); | |||
428 | expkey->len = sizeof(expkey->str); | |||
429 | concat_prefix(bt, mp->prefix.str, mp->prefix.len, | |||
430 | NODEKEY(node)(void *)((node)->data), node->ksize, expkey->str, &expkey->len); | |||
431 | } | |||
432 | ||||
433 | static int | |||
434 | bt_cmp(struct btree *bt, const struct btval *key1, const struct btval *key2, | |||
435 | struct btkey *pfx) | |||
436 | { | |||
437 | if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) | |||
438 | return memnrcmp(key1->data, key1->size - pfx->len, | |||
439 | key2->data, key2->size); | |||
440 | else | |||
441 | return memncmp((char *)key1->data + pfx->len, key1->size - pfx->len, | |||
442 | key2->data, key2->size); | |||
443 | } | |||
444 | ||||
445 | void | |||
446 | btval_reset(struct btval *btv) | |||
447 | { | |||
448 | if (btv) { | |||
449 | if (btv->mp) | |||
450 | btv->mp->ref--; | |||
451 | if (btv->free_data) | |||
452 | free(btv->data); | |||
453 | memset(btv, 0, sizeof(*btv)); | |||
454 | } | |||
455 | } | |||
456 | ||||
457 | static int | |||
458 | mpage_cmp(struct mpage *a, struct mpage *b) | |||
459 | { | |||
460 | if (a->pgno > b->pgno) | |||
461 | return 1; | |||
462 | if (a->pgno < b->pgno) | |||
463 | return -1; | |||
464 | return 0; | |||
465 | } | |||
466 | ||||
467 | static struct mpage * | |||
468 | mpage_lookup(struct btree *bt, pgno_t pgno) | |||
469 | { | |||
470 | struct mpage find, *mp; | |||
471 | ||||
472 | find.pgno = pgno; | |||
473 | mp = RB_FIND(page_cache, bt->page_cache, &find)page_cache_RB_FIND(bt->page_cache, &find); | |||
474 | if (mp) { | |||
475 | bt->stat.hits++; | |||
476 | /* Update LRU queue. Move page to the end. */ | |||
477 | TAILQ_REMOVE(bt->lru_queue, mp, lru_next)do { if (((mp)->lru_next.tqe_next) != ((void *)0)) (mp)-> lru_next.tqe_next->lru_next.tqe_prev = (mp)->lru_next.tqe_prev ; else (bt->lru_queue)->tqh_last = (mp)->lru_next.tqe_prev ; *(mp)->lru_next.tqe_prev = (mp)->lru_next.tqe_next; ; ; } while (0); | |||
478 | TAILQ_INSERT_TAIL(bt->lru_queue, mp, lru_next)do { (mp)->lru_next.tqe_next = ((void *)0); (mp)->lru_next .tqe_prev = (bt->lru_queue)->tqh_last; *(bt->lru_queue )->tqh_last = (mp); (bt->lru_queue)->tqh_last = & (mp)->lru_next.tqe_next; } while (0); | |||
479 | } | |||
480 | return mp; | |||
481 | } | |||
482 | ||||
483 | static void | |||
484 | mpage_add(struct btree *bt, struct mpage *mp) | |||
485 | { | |||
486 | assert(RB_INSERT(page_cache, bt->page_cache, mp) == NULL)((page_cache_RB_INSERT(bt->page_cache, mp) == ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 486 , __func__, "RB_INSERT(page_cache, bt->page_cache, mp) == NULL" )); | |||
487 | bt->stat.cache_size++; | |||
488 | TAILQ_INSERT_TAIL(bt->lru_queue, mp, lru_next)do { (mp)->lru_next.tqe_next = ((void *)0); (mp)->lru_next .tqe_prev = (bt->lru_queue)->tqh_last; *(bt->lru_queue )->tqh_last = (mp); (bt->lru_queue)->tqh_last = & (mp)->lru_next.tqe_next; } while (0); | |||
489 | } | |||
490 | ||||
491 | static void | |||
492 | mpage_free(struct mpage *mp) | |||
493 | { | |||
494 | if (mp != NULL((void *)0)) { | |||
495 | free(mp->page); | |||
496 | free(mp); | |||
497 | } | |||
498 | } | |||
499 | ||||
500 | static void | |||
501 | mpage_del(struct btree *bt, struct mpage *mp) | |||
502 | { | |||
503 | assert(RB_REMOVE(page_cache, bt->page_cache, mp) == mp)((page_cache_RB_REMOVE(bt->page_cache, mp) == mp) ? (void) 0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 503, __func__ , "RB_REMOVE(page_cache, bt->page_cache, mp) == mp")); | |||
504 | assert(bt->stat.cache_size > 0)((bt->stat.cache_size > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 504, __func__, "bt->stat.cache_size > 0")); | |||
505 | bt->stat.cache_size--; | |||
506 | TAILQ_REMOVE(bt->lru_queue, mp, lru_next)do { if (((mp)->lru_next.tqe_next) != ((void *)0)) (mp)-> lru_next.tqe_next->lru_next.tqe_prev = (mp)->lru_next.tqe_prev ; else (bt->lru_queue)->tqh_last = (mp)->lru_next.tqe_prev ; *(mp)->lru_next.tqe_prev = (mp)->lru_next.tqe_next; ; ; } while (0); | |||
507 | } | |||
508 | ||||
509 | static void | |||
510 | mpage_flush(struct btree *bt) | |||
511 | { | |||
512 | struct mpage *mp; | |||
513 | ||||
514 | while ((mp = RB_MIN(page_cache, bt->page_cache)page_cache_RB_MINMAX(bt->page_cache, -1)) != NULL((void *)0)) { | |||
515 | mpage_del(bt, mp); | |||
516 | mpage_free(mp); | |||
517 | } | |||
518 | } | |||
519 | ||||
520 | static struct mpage * | |||
521 | mpage_copy(struct btree *bt, struct mpage *mp) | |||
522 | { | |||
523 | struct mpage *copy; | |||
524 | ||||
525 | if ((copy = calloc(1, sizeof(*copy))) == NULL((void *)0)) | |||
526 | return NULL((void *)0); | |||
527 | if ((copy->page = malloc(bt->head.psize)) == NULL((void *)0)) { | |||
528 | free(copy); | |||
529 | return NULL((void *)0); | |||
530 | } | |||
531 | bcopy(mp->page, copy->page, bt->head.psize); | |||
532 | bcopy(&mp->prefix, ©->prefix, sizeof(mp->prefix)); | |||
533 | copy->parent = mp->parent; | |||
534 | copy->parent_index = mp->parent_index; | |||
535 | copy->pgno = mp->pgno; | |||
536 | ||||
537 | return copy; | |||
538 | } | |||
539 | ||||
540 | /* Remove the least recently used memory pages until the cache size is | |||
541 | * within the configured bounds. Pages referenced by cursors or returned | |||
542 | * key/data are not pruned. | |||
543 | */ | |||
544 | static void | |||
545 | mpage_prune(struct btree *bt) | |||
546 | { | |||
547 | struct mpage *mp, *next; | |||
548 | ||||
549 | for (mp = TAILQ_FIRST(bt->lru_queue)((bt->lru_queue)->tqh_first); mp; mp = next) { | |||
550 | if (bt->stat.cache_size <= bt->stat.max_cache) | |||
551 | break; | |||
552 | next = TAILQ_NEXT(mp, lru_next)((mp)->lru_next.tqe_next); | |||
553 | if (!mp->dirty && mp->ref <= 0) { | |||
554 | mpage_del(bt, mp); | |||
555 | mpage_free(mp); | |||
556 | } | |||
557 | } | |||
558 | } | |||
559 | ||||
560 | /* Mark a page as dirty and push it on the dirty queue. | |||
561 | */ | |||
562 | static void | |||
563 | mpage_dirty(struct btree *bt, struct mpage *mp) | |||
564 | { | |||
565 | assert(bt != NULL)((bt != ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 565, __func__, "bt != NULL")); | |||
566 | assert(bt->txn != NULL)((bt->txn != ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 566, __func__, "bt->txn != NULL")); | |||
567 | ||||
568 | if (!mp->dirty) { | |||
569 | mp->dirty = 1; | |||
570 | SIMPLEQ_INSERT_TAIL(bt->txn->dirty_queue, mp, next)do { (mp)->next.sqe_next = ((void *)0); *(bt->txn->dirty_queue )->sqh_last = (mp); (bt->txn->dirty_queue)->sqh_last = &(mp)->next.sqe_next; } while (0); | |||
571 | } | |||
572 | } | |||
573 | ||||
574 | /* Touch a page: make it dirty and re-insert into tree with updated pgno. | |||
575 | */ | |||
576 | static struct mpage * | |||
577 | mpage_touch(struct btree *bt, struct mpage *mp) | |||
578 | { | |||
579 | assert(bt != NULL)((bt != ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 579, __func__, "bt != NULL")); | |||
580 | assert(bt->txn != NULL)((bt->txn != ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 580, __func__, "bt->txn != NULL")); | |||
581 | assert(mp != NULL)((mp != ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 581, __func__, "mp != NULL")); | |||
582 | ||||
583 | if (!mp->dirty) { | |||
584 | DPRINTF("touching page %u -> %u", mp->pgno, bt->txn->next_pgno); | |||
585 | if (mp->ref == 0) | |||
586 | mpage_del(bt, mp); | |||
587 | else { | |||
588 | if ((mp = mpage_copy(bt, mp)) == NULL((void *)0)) | |||
589 | return NULL((void *)0); | |||
590 | } | |||
591 | mp->pgno = mp->page->pgno = bt->txn->next_pgno++; | |||
592 | mpage_dirty(bt, mp); | |||
593 | mpage_add(bt, mp); | |||
594 | ||||
595 | /* Update the page number to new touched page. */ | |||
596 | if (mp->parent != NULL((void *)0)) | |||
597 | NODEPGNO(NODEPTR(mp->parent,((((struct node *)((char *)((mp->parent)->page) + ((mp-> parent)->page)->ptrs[mp->parent_index])))->p.np_pgno ) | |||
598 | mp->parent_index))((((struct node *)((char *)((mp->parent)->page) + ((mp-> parent)->page)->ptrs[mp->parent_index])))->p.np_pgno ) = mp->pgno; | |||
599 | } | |||
600 | ||||
601 | return mp; | |||
602 | } | |||
603 | ||||
604 | static int | |||
605 | btree_read_page(struct btree *bt, pgno_t pgno, struct page *page) | |||
606 | { | |||
607 | ssize_t rc; | |||
608 | ||||
609 | DPRINTF("reading page %u", pgno); | |||
610 | bt->stat.reads++; | |||
611 | if ((rc = pread(bt->fd, page, bt->head.psize, (off_t)pgno*bt->head.psize)) == 0) { | |||
612 | DPRINTF("page %u doesn't exist", pgno); | |||
613 | errno(*__errno()) = ENOENT2; | |||
614 | return BT_FAIL-1; | |||
615 | } else if (rc != (ssize_t)bt->head.psize) { | |||
616 | if (rc > 0) | |||
617 | errno(*__errno()) = EINVAL22; | |||
618 | DPRINTF("read: %s", strerror(errno)); | |||
619 | return BT_FAIL-1; | |||
620 | } | |||
621 | ||||
622 | if (page->pgno != pgno) { | |||
623 | DPRINTF("page numbers don't match: %u != %u", pgno, page->pgno); | |||
624 | errno(*__errno()) = EINVAL22; | |||
625 | return BT_FAIL-1; | |||
626 | } | |||
627 | ||||
628 | DPRINTF("page %u has flags 0x%X", pgno, page->flags); | |||
629 | ||||
630 | return BT_SUCCESS0; | |||
631 | } | |||
632 | ||||
633 | int | |||
634 | btree_sync(struct btree *bt) | |||
635 | { | |||
636 | if (!F_ISSET(bt->flags, BT_NOSYNC)(((bt->flags) & (0x02)) == (0x02))) | |||
637 | return fsync(bt->fd); | |||
638 | return 0; | |||
639 | } | |||
640 | ||||
641 | struct btree_txn * | |||
642 | btree_txn_begin(struct btree *bt, int rdonly) | |||
643 | { | |||
644 | struct btree_txn *txn; | |||
645 | ||||
646 | if (!rdonly && bt->txn != NULL((void *)0)) { | |||
647 | DPRINTF("write transaction already begun"); | |||
648 | errno(*__errno()) = EBUSY16; | |||
649 | return NULL((void *)0); | |||
650 | } | |||
651 | ||||
652 | if ((txn = calloc(1, sizeof(*txn))) == NULL((void *)0)) { | |||
653 | DPRINTF("calloc: %s", strerror(errno)); | |||
654 | return NULL((void *)0); | |||
655 | } | |||
656 | ||||
657 | if (rdonly) { | |||
658 | txn->flags |= BT_TXN_RDONLY0x01; | |||
659 | } else { | |||
660 | txn->dirty_queue = calloc(1, sizeof(*txn->dirty_queue)); | |||
661 | if (txn->dirty_queue == NULL((void *)0)) { | |||
662 | free(txn); | |||
663 | return NULL((void *)0); | |||
664 | } | |||
665 | SIMPLEQ_INIT(txn->dirty_queue)do { (txn->dirty_queue)->sqh_first = ((void *)0); (txn-> dirty_queue)->sqh_last = &(txn->dirty_queue)->sqh_first ; } while (0); | |||
666 | ||||
667 | DPRINTF("taking write lock on txn %p", txn); | |||
668 | if (flock(bt->fd, LOCK_EX0x02 | LOCK_NB0x04) != 0) { | |||
669 | DPRINTF("flock: %s", strerror(errno)); | |||
670 | errno(*__errno()) = EBUSY16; | |||
671 | free(txn->dirty_queue); | |||
672 | free(txn); | |||
673 | return NULL((void *)0); | |||
674 | } | |||
675 | bt->txn = txn; | |||
676 | } | |||
677 | ||||
678 | txn->bt = bt; | |||
679 | btree_ref(bt); | |||
680 | ||||
681 | if (btree_read_meta(bt, &txn->next_pgno) != BT_SUCCESS0) { | |||
682 | btree_txn_abort(txn); | |||
683 | return NULL((void *)0); | |||
684 | } | |||
685 | ||||
686 | txn->root = bt->meta.root; | |||
687 | DPRINTF("begin transaction on btree %p, root page %u", bt, txn->root); | |||
688 | ||||
689 | return txn; | |||
690 | } | |||
691 | ||||
692 | void | |||
693 | btree_txn_abort(struct btree_txn *txn) | |||
694 | { | |||
695 | struct mpage *mp; | |||
696 | struct btree *bt; | |||
697 | ||||
698 | if (txn == NULL((void *)0)) | |||
699 | return; | |||
700 | ||||
701 | bt = txn->bt; | |||
702 | DPRINTF("abort transaction on btree %p, root page %u", bt, txn->root); | |||
703 | ||||
704 | if (!F_ISSET(txn->flags, BT_TXN_RDONLY)(((txn->flags) & (0x01)) == (0x01))) { | |||
705 | /* Discard all dirty pages. | |||
706 | */ | |||
707 | while (!SIMPLEQ_EMPTY(txn->dirty_queue)(((txn->dirty_queue)->sqh_first) == ((void *)0))) { | |||
708 | mp = SIMPLEQ_FIRST(txn->dirty_queue)((txn->dirty_queue)->sqh_first); | |||
709 | assert(mp->ref == 0)((mp->ref == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 709, __func__, "mp->ref == 0")); /* cursors should be closed */ | |||
710 | mpage_del(bt, mp); | |||
711 | SIMPLEQ_REMOVE_HEAD(txn->dirty_queue, next)do { if (((txn->dirty_queue)->sqh_first = (txn->dirty_queue )->sqh_first->next.sqe_next) == ((void *)0)) (txn->dirty_queue )->sqh_last = &(txn->dirty_queue)->sqh_first; } while (0); | |||
712 | mpage_free(mp); | |||
713 | } | |||
714 | ||||
715 | DPRINTF("releasing write lock on txn %p", txn); | |||
716 | txn->bt->txn = NULL((void *)0); | |||
717 | if (flock(txn->bt->fd, LOCK_UN0x08) != 0) { | |||
718 | DPRINTF("failed to unlock fd %d: %s", | |||
719 | txn->bt->fd, strerror(errno)); | |||
720 | } | |||
721 | free(txn->dirty_queue); | |||
722 | } | |||
723 | ||||
724 | btree_close(txn->bt); | |||
725 | free(txn); | |||
726 | } | |||
727 | ||||
728 | int | |||
729 | btree_txn_commit(struct btree_txn *txn) | |||
730 | { | |||
731 | int n, done; | |||
732 | ssize_t rc; | |||
733 | off_t size; | |||
734 | struct mpage *mp; | |||
735 | struct btree *bt; | |||
736 | struct iovec iov[BT_COMMIT_PAGES64]; | |||
737 | ||||
738 | assert(txn != NULL)((txn != ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 738, __func__, "txn != NULL")); | |||
739 | assert(txn->bt != NULL)((txn->bt != ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 739, __func__, "txn->bt != NULL")); | |||
740 | ||||
741 | bt = txn->bt; | |||
742 | ||||
743 | if (F_ISSET(txn->flags, BT_TXN_RDONLY)(((txn->flags) & (0x01)) == (0x01))) { | |||
744 | DPRINTF("attempt to commit read-only transaction"); | |||
745 | btree_txn_abort(txn); | |||
746 | errno(*__errno()) = EPERM1; | |||
747 | return BT_FAIL-1; | |||
748 | } | |||
749 | ||||
750 | if (txn != bt->txn) { | |||
751 | DPRINTF("attempt to commit unknown transaction"); | |||
752 | btree_txn_abort(txn); | |||
753 | errno(*__errno()) = EINVAL22; | |||
754 | return BT_FAIL-1; | |||
755 | } | |||
756 | ||||
757 | if (F_ISSET(txn->flags, BT_TXN_ERROR)(((txn->flags) & (0x02)) == (0x02))) { | |||
758 | DPRINTF("error flag is set, can't commit"); | |||
759 | btree_txn_abort(txn); | |||
760 | errno(*__errno()) = EINVAL22; | |||
761 | return BT_FAIL-1; | |||
762 | } | |||
763 | ||||
764 | if (SIMPLEQ_EMPTY(txn->dirty_queue)(((txn->dirty_queue)->sqh_first) == ((void *)0))) | |||
765 | goto done; | |||
766 | ||||
767 | if (F_ISSET(bt->flags, BT_FIXPADDING)(((bt->flags) & (0x01)) == (0x01))) { | |||
768 | size = lseek(bt->fd, 0, SEEK_END2); | |||
769 | size += bt->head.psize - (size % bt->head.psize); | |||
770 | DPRINTF("extending to multiple of page size: %llu", size); | |||
771 | if (ftruncate(bt->fd, size) != 0) { | |||
772 | DPRINTF("ftruncate: %s", strerror(errno)); | |||
773 | btree_txn_abort(txn); | |||
774 | return BT_FAIL-1; | |||
775 | } | |||
776 | bt->flags &= ~BT_FIXPADDING0x01; | |||
777 | } | |||
778 | ||||
779 | DPRINTF("committing transaction on btree %p, root page %u", | |||
780 | bt, txn->root); | |||
781 | ||||
782 | /* Commit up to BT_COMMIT_PAGES dirty pages to disk until done. | |||
783 | */ | |||
784 | do { | |||
785 | n = 0; | |||
786 | done = 1; | |||
787 | SIMPLEQ_FOREACH(mp, txn->dirty_queue, next)for((mp) = ((txn->dirty_queue)->sqh_first); (mp) != ((void *)0); (mp) = ((mp)->next.sqe_next)) { | |||
788 | DPRINTF("committing page %u", mp->pgno); | |||
789 | iov[n].iov_len = bt->head.psize; | |||
790 | iov[n].iov_base = mp->page; | |||
791 | if (++n >= BT_COMMIT_PAGES64) { | |||
792 | done = 0; | |||
793 | break; | |||
794 | } | |||
795 | } | |||
796 | ||||
797 | if (n == 0) | |||
798 | break; | |||
799 | ||||
800 | DPRINTF("committing %u dirty pages", n); | |||
801 | rc = writev(bt->fd, iov, n); | |||
802 | if (rc != (ssize_t)bt->head.psize*n) { | |||
803 | if (rc > 0) | |||
804 | DPRINTF("short write, filesystem full?"); | |||
805 | else | |||
806 | DPRINTF("writev: %s", strerror(errno)); | |||
807 | btree_txn_abort(txn); | |||
808 | return BT_FAIL-1; | |||
809 | } | |||
810 | ||||
811 | /* Remove the dirty flag from the written pages. | |||
812 | */ | |||
813 | while (!SIMPLEQ_EMPTY(txn->dirty_queue)(((txn->dirty_queue)->sqh_first) == ((void *)0))) { | |||
814 | mp = SIMPLEQ_FIRST(txn->dirty_queue)((txn->dirty_queue)->sqh_first); | |||
815 | mp->dirty = 0; | |||
816 | SIMPLEQ_REMOVE_HEAD(txn->dirty_queue, next)do { if (((txn->dirty_queue)->sqh_first = (txn->dirty_queue )->sqh_first->next.sqe_next) == ((void *)0)) (txn->dirty_queue )->sqh_last = &(txn->dirty_queue)->sqh_first; } while (0); | |||
817 | if (--n == 0) | |||
818 | break; | |||
819 | } | |||
820 | } while (!done); | |||
821 | ||||
822 | if (btree_sync(bt) != 0 || | |||
823 | btree_write_meta(bt, txn->root, 0) != BT_SUCCESS0 || | |||
824 | btree_sync(bt) != 0) { | |||
825 | btree_txn_abort(txn); | |||
826 | return BT_FAIL-1; | |||
827 | } | |||
828 | ||||
829 | done: | |||
830 | mpage_prune(bt); | |||
831 | btree_txn_abort(txn); | |||
832 | ||||
833 | return BT_SUCCESS0; | |||
834 | } | |||
835 | ||||
836 | static int | |||
837 | btree_write_header(struct btree *bt, int fd) | |||
838 | { | |||
839 | struct stat sb; | |||
840 | struct bt_head *h; | |||
841 | struct page *p; | |||
842 | ssize_t rc; | |||
843 | unsigned int psize; | |||
844 | ||||
845 | DPRINTF("writing header page"); | |||
846 | assert(bt != NULL)((bt != ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 846, __func__, "bt != NULL")); | |||
847 | ||||
848 | /* | |||
849 | * Ask stat for 'optimal blocksize for I/O', but cap to fit in indx_t. | |||
850 | */ | |||
851 | if (fstat(fd, &sb) == 0) | |||
852 | psize = MINIMUM(32*1024, sb.st_blksize)((32*1024) < (sb.st_blksize) ? (32*1024) : (sb.st_blksize) ); | |||
853 | else | |||
854 | psize = PAGESIZE4096; | |||
855 | ||||
856 | if ((p = calloc(1, psize)) == NULL((void *)0)) | |||
857 | return -1; | |||
858 | p->flags = P_HEAD0x10; | |||
859 | ||||
860 | h = METADATA(p)((void *)((char *)p + __builtin_offsetof(struct page, ptrs))); | |||
861 | h->magic = BT_MAGIC0xB3DBB3DB; | |||
862 | h->version = BT_VERSION4; | |||
863 | h->psize = psize; | |||
864 | bcopy(h, &bt->head, sizeof(*h)); | |||
865 | ||||
866 | rc = write(fd, p, bt->head.psize); | |||
867 | free(p); | |||
868 | if (rc != (ssize_t)bt->head.psize) { | |||
869 | if (rc > 0) | |||
870 | DPRINTF("short write, filesystem full?"); | |||
871 | return BT_FAIL-1; | |||
872 | } | |||
873 | ||||
874 | return BT_SUCCESS0; | |||
875 | } | |||
876 | ||||
877 | static int | |||
878 | btree_read_header(struct btree *bt) | |||
879 | { | |||
880 | char page[PAGESIZE4096]; | |||
881 | struct page *p; | |||
882 | struct bt_head *h; | |||
883 | int rc; | |||
884 | ||||
885 | assert(bt != NULL)((bt != ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 885, __func__, "bt != NULL")); | |||
886 | ||||
887 | /* We don't know the page size yet, so use a minimum value. | |||
888 | */ | |||
889 | ||||
890 | if ((rc = pread(bt->fd, page, PAGESIZE4096, 0)) == 0) { | |||
891 | errno(*__errno()) = ENOENT2; | |||
892 | return -1; | |||
893 | } else if (rc != PAGESIZE4096) { | |||
894 | if (rc > 0) | |||
895 | errno(*__errno()) = EINVAL22; | |||
896 | DPRINTF("read: %s", strerror(errno)); | |||
897 | return -1; | |||
898 | } | |||
899 | ||||
900 | p = (struct page *)page; | |||
901 | ||||
902 | if (!F_ISSET(p->flags, P_HEAD)(((p->flags) & (0x10)) == (0x10))) { | |||
903 | DPRINTF("page %d not a header page", p->pgno); | |||
904 | errno(*__errno()) = EINVAL22; | |||
905 | return -1; | |||
906 | } | |||
907 | ||||
908 | h = METADATA(p)((void *)((char *)p + __builtin_offsetof(struct page, ptrs))); | |||
909 | if (h->magic != BT_MAGIC0xB3DBB3DB) { | |||
910 | DPRINTF("header has invalid magic"); | |||
911 | errno(*__errno()) = EINVAL22; | |||
912 | return -1; | |||
913 | } | |||
914 | ||||
915 | if (h->version != BT_VERSION4) { | |||
916 | DPRINTF("database is version %u, expected version %u", | |||
917 | bt->head.version, BT_VERSION); | |||
918 | errno(*__errno()) = EINVAL22; | |||
919 | return -1; | |||
920 | } | |||
921 | ||||
922 | bcopy(h, &bt->head, sizeof(*h)); | |||
923 | return 0; | |||
924 | } | |||
925 | ||||
926 | static int | |||
927 | btree_write_meta(struct btree *bt, pgno_t root, unsigned int flags) | |||
928 | { | |||
929 | struct mpage *mp; | |||
930 | struct bt_meta *meta; | |||
931 | ssize_t rc; | |||
932 | ||||
933 | DPRINTF("writing meta page for root page %u", root); | |||
934 | ||||
935 | assert(bt != NULL)((bt != ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 935, __func__, "bt != NULL")); | |||
936 | assert(bt->txn != NULL)((bt->txn != ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 936, __func__, "bt->txn != NULL")); | |||
937 | ||||
938 | if ((mp = btree_new_page(bt, P_META0x08)) == NULL((void *)0)) | |||
939 | return -1; | |||
940 | ||||
941 | bt->meta.prev_meta = bt->meta.root; | |||
942 | bt->meta.root = root; | |||
943 | bt->meta.flags = flags; | |||
944 | bt->meta.created_at = time(0); | |||
945 | bt->meta.revisions++; | |||
946 | SHA1((unsigned char *)&bt->meta, METAHASHLEN__builtin_offsetof(struct bt_meta, hash), bt->meta.hash); | |||
947 | ||||
948 | /* Copy the meta data changes to the new meta page. */ | |||
949 | meta = METADATA(mp->page)((void *)((char *)mp->page + __builtin_offsetof(struct page , ptrs))); | |||
950 | bcopy(&bt->meta, meta, sizeof(*meta)); | |||
951 | ||||
952 | rc = write(bt->fd, mp->page, bt->head.psize); | |||
953 | mp->dirty = 0; | |||
954 | SIMPLEQ_REMOVE_HEAD(bt->txn->dirty_queue, next)do { if (((bt->txn->dirty_queue)->sqh_first = (bt-> txn->dirty_queue)->sqh_first->next.sqe_next) == ((void *)0)) (bt->txn->dirty_queue)->sqh_last = &(bt-> txn->dirty_queue)->sqh_first; } while (0); | |||
955 | if (rc != (ssize_t)bt->head.psize) { | |||
956 | if (rc > 0) | |||
957 | DPRINTF("short write, filesystem full?"); | |||
958 | return BT_FAIL-1; | |||
959 | } | |||
960 | ||||
961 | if ((bt->size = lseek(bt->fd, 0, SEEK_END2)) == -1) { | |||
962 | DPRINTF("failed to update file size: %s", strerror(errno)); | |||
963 | bt->size = 0; | |||
964 | } | |||
965 | ||||
966 | return BT_SUCCESS0; | |||
967 | } | |||
968 | ||||
969 | /* Returns true if page p is a valid meta page, false otherwise. | |||
970 | */ | |||
971 | static int | |||
972 | btree_is_meta_page(struct page *p) | |||
973 | { | |||
974 | struct bt_meta *m; | |||
975 | unsigned char hash[SHA_DIGEST_LENGTH20]; | |||
976 | ||||
977 | m = METADATA(p)((void *)((char *)p + __builtin_offsetof(struct page, ptrs))); | |||
978 | if (!F_ISSET(p->flags, P_META)(((p->flags) & (0x08)) == (0x08))) { | |||
979 | DPRINTF("page %d not a meta page", p->pgno); | |||
980 | errno(*__errno()) = EINVAL22; | |||
981 | return 0; | |||
982 | } | |||
983 | ||||
984 | if (m->root >= p->pgno && m->root != P_INVALID0xFFFFFFFF) { | |||
985 | DPRINTF("page %d points to an invalid root page", p->pgno); | |||
986 | errno(*__errno()) = EINVAL22; | |||
987 | return 0; | |||
988 | } | |||
989 | ||||
990 | SHA1((unsigned char *)m, METAHASHLEN__builtin_offsetof(struct bt_meta, hash), hash); | |||
991 | if (bcmp(hash, m->hash, SHA_DIGEST_LENGTH20) != 0) { | |||
992 | DPRINTF("page %d has an invalid digest", p->pgno); | |||
993 | errno(*__errno()) = EINVAL22; | |||
994 | return 0; | |||
995 | } | |||
996 | ||||
997 | return 1; | |||
998 | } | |||
999 | ||||
1000 | static int | |||
1001 | btree_read_meta(struct btree *bt, pgno_t *p_next) | |||
1002 | { | |||
1003 | struct mpage *mp; | |||
1004 | struct bt_meta *meta; | |||
1005 | pgno_t meta_pgno, next_pgno; | |||
1006 | off_t size; | |||
1007 | ||||
1008 | assert(bt != NULL)((bt != ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1008, __func__, "bt != NULL")); | |||
1009 | ||||
1010 | if ((size = lseek(bt->fd, 0, SEEK_END2)) == -1) | |||
1011 | goto fail; | |||
1012 | ||||
1013 | DPRINTF("btree_read_meta: size = %llu", size); | |||
1014 | ||||
1015 | if (size < bt->size) { | |||
1016 | DPRINTF("file has shrunk!"); | |||
1017 | errno(*__errno()) = EIO5; | |||
1018 | goto fail; | |||
1019 | } | |||
1020 | ||||
1021 | if (size == bt->head.psize) { /* there is only the header */ | |||
1022 | if (p_next != NULL((void *)0)) | |||
1023 | *p_next = 1; | |||
1024 | return BT_SUCCESS0; /* new file */ | |||
1025 | } | |||
1026 | ||||
1027 | next_pgno = size / bt->head.psize; | |||
| ||||
1028 | if (next_pgno == 0) { | |||
1029 | DPRINTF("corrupt file"); | |||
1030 | errno(*__errno()) = EIO5; | |||
1031 | goto fail; | |||
1032 | } | |||
1033 | ||||
1034 | meta_pgno = next_pgno - 1; | |||
1035 | ||||
1036 | if (size % bt->head.psize != 0) { | |||
1037 | DPRINTF("filesize not a multiple of the page size!"); | |||
1038 | bt->flags |= BT_FIXPADDING0x01; | |||
1039 | next_pgno++; | |||
1040 | } | |||
1041 | ||||
1042 | if (p_next != NULL((void *)0)) | |||
1043 | *p_next = next_pgno; | |||
1044 | ||||
1045 | if (size == bt->size) { | |||
1046 | DPRINTF("size unchanged, keeping current meta page"); | |||
1047 | if (F_ISSET(bt->meta.flags, BT_TOMBSTONE)(((bt->meta.flags) & (0x01)) == (0x01))) { | |||
1048 | DPRINTF("file is dead"); | |||
1049 | errno(*__errno()) = ESTALE70; | |||
1050 | return BT_FAIL-1; | |||
1051 | } else | |||
1052 | return BT_SUCCESS0; | |||
1053 | } | |||
1054 | bt->size = size; | |||
1055 | ||||
1056 | while (meta_pgno > 0) { | |||
1057 | if ((mp = btree_get_mpage(bt, meta_pgno)) == NULL((void *)0)) | |||
1058 | break; | |||
1059 | if (btree_is_meta_page(mp->page)) { | |||
1060 | meta = METADATA(mp->page)((void *)((char *)mp->page + __builtin_offsetof(struct page , ptrs))); | |||
1061 | DPRINTF("flags = 0x%x", meta->flags); | |||
1062 | if (F_ISSET(meta->flags, BT_TOMBSTONE)(((meta->flags) & (0x01)) == (0x01))) { | |||
1063 | DPRINTF("file is dead"); | |||
1064 | errno(*__errno()) = ESTALE70; | |||
1065 | return BT_FAIL-1; | |||
1066 | } else { | |||
1067 | /* Make copy of last meta page. */ | |||
1068 | bcopy(meta, &bt->meta, sizeof(bt->meta)); | |||
1069 | return BT_SUCCESS0; | |||
1070 | } | |||
1071 | } | |||
1072 | --meta_pgno; /* scan backwards to first valid meta page */ | |||
1073 | } | |||
1074 | ||||
1075 | errno(*__errno()) = EIO5; | |||
1076 | fail: | |||
1077 | if (p_next != NULL((void *)0)) | |||
1078 | *p_next = P_INVALID0xFFFFFFFF; | |||
1079 | return BT_FAIL-1; | |||
1080 | } | |||
1081 | ||||
1082 | struct btree * | |||
1083 | btree_open_fd(int fd, unsigned int flags) | |||
1084 | { | |||
1085 | struct btree *bt; | |||
1086 | int fl; | |||
1087 | ||||
1088 | fl = fcntl(fd, F_GETFL3); | |||
1089 | if (fcntl(fd, F_SETFL4, fl | O_APPEND0x0008) == -1) | |||
1090 | return NULL((void *)0); | |||
1091 | ||||
1092 | if ((bt = calloc(1, sizeof(*bt))) == NULL((void *)0)) | |||
1093 | return NULL((void *)0); | |||
1094 | bt->fd = fd; | |||
1095 | bt->flags = flags; | |||
1096 | bt->flags &= ~BT_FIXPADDING0x01; | |||
1097 | bt->ref = 1; | |||
1098 | bt->meta.root = P_INVALID0xFFFFFFFF; | |||
1099 | ||||
1100 | if ((bt->page_cache = calloc(1, sizeof(*bt->page_cache))) == NULL((void *)0)) | |||
1101 | goto fail; | |||
1102 | bt->stat.max_cache = BT_MAXCACHE_DEF1024; | |||
1103 | RB_INIT(bt->page_cache)do { (bt->page_cache)->rbh_root = ((void *)0); } while ( 0); | |||
1104 | ||||
1105 | if ((bt->lru_queue = calloc(1, sizeof(*bt->lru_queue))) == NULL((void *)0)) | |||
1106 | goto fail; | |||
1107 | TAILQ_INIT(bt->lru_queue)do { (bt->lru_queue)->tqh_first = ((void *)0); (bt-> lru_queue)->tqh_last = &(bt->lru_queue)->tqh_first ; } while (0); | |||
1108 | ||||
1109 | if (btree_read_header(bt) != 0) { | |||
1110 | if (errno(*__errno()) != ENOENT2) | |||
1111 | goto fail; | |||
1112 | DPRINTF("new database"); | |||
1113 | btree_write_header(bt, bt->fd); | |||
1114 | } | |||
1115 | ||||
1116 | if (btree_read_meta(bt, NULL((void *)0)) != 0) | |||
1117 | goto fail; | |||
1118 | ||||
1119 | DPRINTF("opened database version %u, pagesize %u", | |||
1120 | bt->head.version, bt->head.psize); | |||
1121 | DPRINTF("timestamp: %s", ctime(&bt->meta.created_at)); | |||
1122 | DPRINTF("depth: %u", bt->meta.depth); | |||
1123 | DPRINTF("entries: %llu", bt->meta.entries); | |||
1124 | DPRINTF("revisions: %u", bt->meta.revisions); | |||
1125 | DPRINTF("branch pages: %u", bt->meta.branch_pages); | |||
1126 | DPRINTF("leaf pages: %u", bt->meta.leaf_pages); | |||
1127 | DPRINTF("overflow pages: %u", bt->meta.overflow_pages); | |||
1128 | DPRINTF("root: %u", bt->meta.root); | |||
1129 | DPRINTF("previous meta page: %u", bt->meta.prev_meta); | |||
1130 | ||||
1131 | return bt; | |||
1132 | ||||
1133 | fail: | |||
1134 | free(bt->lru_queue); | |||
1135 | free(bt->page_cache); | |||
1136 | free(bt); | |||
1137 | return NULL((void *)0); | |||
1138 | } | |||
1139 | ||||
1140 | struct btree * | |||
1141 | btree_open(const char *path, unsigned int flags, mode_t mode) | |||
1142 | { | |||
1143 | int fd, oflags; | |||
1144 | struct btree *bt; | |||
1145 | ||||
1146 | if (F_ISSET(flags, BT_RDONLY)(((flags) & (0x04)) == (0x04))) | |||
| ||||
1147 | oflags = O_RDONLY0x0000; | |||
1148 | else | |||
1149 | oflags = O_RDWR0x0002 | O_CREAT0x0200 | O_APPEND0x0008; | |||
1150 | ||||
1151 | if ((fd = open(path, oflags, mode)) == -1) | |||
1152 | return NULL((void *)0); | |||
1153 | ||||
1154 | if ((bt = btree_open_fd(fd, flags)) == NULL((void *)0)) | |||
1155 | close(fd); | |||
1156 | else { | |||
1157 | bt->path = strdup(path); | |||
1158 | DPRINTF("opened btree %p", bt); | |||
1159 | } | |||
1160 | ||||
1161 | return bt; | |||
1162 | } | |||
1163 | ||||
1164 | static void | |||
1165 | btree_ref(struct btree *bt) | |||
1166 | { | |||
1167 | bt->ref++; | |||
1168 | DPRINTF("ref is now %d on btree %p", bt->ref, bt); | |||
1169 | } | |||
1170 | ||||
1171 | void | |||
1172 | btree_close(struct btree *bt) | |||
1173 | { | |||
1174 | if (bt == NULL((void *)0)) | |||
1175 | return; | |||
1176 | ||||
1177 | if (--bt->ref == 0) { | |||
1178 | DPRINTF("ref is zero, closing btree %p", bt); | |||
1179 | close(bt->fd); | |||
1180 | mpage_flush(bt); | |||
1181 | free(bt->lru_queue); | |||
1182 | free(bt->path); | |||
1183 | free(bt->page_cache); | |||
1184 | free(bt); | |||
1185 | } else | |||
1186 | DPRINTF("ref is now %d on btree %p", bt->ref, bt); | |||
1187 | } | |||
1188 | ||||
1189 | /* Search for key within a leaf page, using binary search. | |||
1190 | * Returns the smallest entry larger or equal to the key. | |||
1191 | * If exactp is non-null, stores whether the found entry was an exact match | |||
1192 | * in *exactp (1 or 0). | |||
1193 | * If kip is non-null, stores the index of the found entry in *kip. | |||
1194 | * If no entry larger of equal to the key is found, returns NULL. | |||
1195 | */ | |||
1196 | static struct node * | |||
1197 | btree_search_node(struct btree *bt, struct mpage *mp, struct btval *key, | |||
1198 | int *exactp, unsigned int *kip) | |||
1199 | { | |||
1200 | unsigned int i = 0; | |||
1201 | int low, high; | |||
1202 | int rc = 0; | |||
1203 | struct node *node; | |||
1204 | struct btval nodekey; | |||
1205 | ||||
1206 | DPRINTF("searching %lu keys in %s page %u with prefix [%.*s]", | |||
1207 | NUMKEYS(mp), | |||
1208 | IS_LEAF(mp) ? "leaf" : "branch", | |||
1209 | mp->pgno, (int)mp->prefix.len, (char *)mp->prefix.str); | |||
1210 | ||||
1211 | assert(NUMKEYS(mp) > 0)(((((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1) > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1211, __func__, "NUMKEYS(mp) > 0")); | |||
1212 | ||||
1213 | memset(&nodekey, 0, sizeof(nodekey)); | |||
1214 | ||||
1215 | low = IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02)) ? 0 : 1; | |||
1216 | high = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1) - 1; | |||
1217 | while (low <= high) { | |||
1218 | i = (low + high) >> 1; | |||
1219 | node = NODEPTR(mp, i)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[i])); | |||
1220 | ||||
1221 | nodekey.size = node->ksize; | |||
1222 | nodekey.data = NODEKEY(node)(void *)((node)->data); | |||
1223 | ||||
1224 | if (bt->cmp) | |||
1225 | rc = bt->cmp(key, &nodekey); | |||
1226 | else | |||
1227 | rc = bt_cmp(bt, key, &nodekey, &mp->prefix); | |||
1228 | ||||
1229 | if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) | |||
1230 | DPRINTF("found leaf index %u [%.*s], rc = %d", | |||
1231 | i, (int)nodekey.size, (char *)nodekey.data, rc); | |||
1232 | else | |||
1233 | DPRINTF("found branch index %u [%.*s -> %u], rc = %d", | |||
1234 | i, (int)node->ksize, (char *)NODEKEY(node), | |||
1235 | node->n_pgno, rc); | |||
1236 | ||||
1237 | if (rc == 0) | |||
1238 | break; | |||
1239 | if (rc > 0) | |||
1240 | low = i + 1; | |||
1241 | else | |||
1242 | high = i - 1; | |||
1243 | } | |||
1244 | ||||
1245 | if (rc > 0) { /* Found entry is less than the key. */ | |||
1246 | i++; /* Skip to get the smallest entry larger than key. */ | |||
1247 | if (i >= NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1)) | |||
1248 | /* There is no entry larger or equal to the key. */ | |||
1249 | return NULL((void *)0); | |||
1250 | } | |||
1251 | if (exactp) | |||
1252 | *exactp = (rc == 0); | |||
1253 | if (kip) /* Store the key index if requested. */ | |||
1254 | *kip = i; | |||
1255 | ||||
1256 | return NODEPTR(mp, i)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[i])); | |||
1257 | } | |||
1258 | ||||
1259 | static void | |||
1260 | cursor_pop_page(struct cursor *cursor) | |||
1261 | { | |||
1262 | struct ppage *top; | |||
1263 | ||||
1264 | top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first); | |||
1265 | CURSOR_POP(cursor)do { (&(cursor)->stack)->slh_first = (&(cursor) ->stack)->slh_first->entry.sle_next; } while (0); | |||
1266 | top->mpage->ref--; | |||
1267 | ||||
1268 | DPRINTF("popped page %u off cursor %p", top->mpage->pgno, cursor); | |||
1269 | ||||
1270 | free(top); | |||
1271 | } | |||
1272 | ||||
1273 | static struct ppage * | |||
1274 | cursor_push_page(struct cursor *cursor, struct mpage *mp) | |||
1275 | { | |||
1276 | struct ppage *ppage; | |||
1277 | ||||
1278 | DPRINTF("pushing page %u on cursor %p", mp->pgno, cursor); | |||
1279 | ||||
1280 | if ((ppage = calloc(1, sizeof(*ppage))) == NULL((void *)0)) | |||
1281 | return NULL((void *)0); | |||
1282 | ppage->mpage = mp; | |||
1283 | mp->ref++; | |||
1284 | CURSOR_PUSH(cursor, ppage)do { (ppage)->entry.sle_next = (&(cursor)->stack)-> slh_first; (&(cursor)->stack)->slh_first = (ppage); } while (0); | |||
1285 | return ppage; | |||
1286 | } | |||
1287 | ||||
1288 | static struct mpage * | |||
1289 | btree_get_mpage(struct btree *bt, pgno_t pgno) | |||
1290 | { | |||
1291 | struct mpage *mp; | |||
1292 | ||||
1293 | mp = mpage_lookup(bt, pgno); | |||
1294 | if (mp == NULL((void *)0)) { | |||
1295 | if ((mp = calloc(1, sizeof(*mp))) == NULL((void *)0)) | |||
1296 | return NULL((void *)0); | |||
1297 | if ((mp->page = malloc(bt->head.psize)) == NULL((void *)0)) { | |||
1298 | free(mp); | |||
1299 | return NULL((void *)0); | |||
1300 | } | |||
1301 | if (btree_read_page(bt, pgno, mp->page) != BT_SUCCESS0) { | |||
1302 | mpage_free(mp); | |||
1303 | return NULL((void *)0); | |||
1304 | } | |||
1305 | mp->pgno = pgno; | |||
1306 | mpage_add(bt, mp); | |||
1307 | } else | |||
1308 | DPRINTF("returning page %u from cache", pgno); | |||
1309 | ||||
1310 | return mp; | |||
1311 | } | |||
1312 | ||||
1313 | static void | |||
1314 | concat_prefix(struct btree *bt, char *s1, size_t n1, char *s2, size_t n2, | |||
1315 | char *cs, size_t *cn) | |||
1316 | { | |||
1317 | assert(*cn >= n1 + n2)((*cn >= n1 + n2) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1317, __func__, "*cn >= n1 + n2")); | |||
1318 | if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) { | |||
1319 | bcopy(s2, cs, n2); | |||
1320 | bcopy(s1, cs + n2, n1); | |||
1321 | } else { | |||
1322 | bcopy(s1, cs, n1); | |||
1323 | bcopy(s2, cs + n1, n2); | |||
1324 | } | |||
1325 | *cn = n1 + n2; | |||
1326 | } | |||
1327 | ||||
1328 | static void | |||
1329 | find_common_prefix(struct btree *bt, struct mpage *mp) | |||
1330 | { | |||
1331 | indx_t lbound = 0, ubound = 0; | |||
1332 | struct mpage *lp, *up; | |||
1333 | struct btkey lprefix, uprefix; | |||
1334 | ||||
1335 | mp->prefix.len = 0; | |||
1336 | if (bt->cmp != NULL((void *)0)) | |||
1337 | return; | |||
1338 | ||||
1339 | lp = mp; | |||
1340 | while (lp->parent != NULL((void *)0)) { | |||
1341 | if (lp->parent_index > 0) { | |||
1342 | lbound = lp->parent_index; | |||
1343 | break; | |||
1344 | } | |||
1345 | lp = lp->parent; | |||
1346 | } | |||
1347 | ||||
1348 | up = mp; | |||
1349 | while (up->parent != NULL((void *)0)) { | |||
1350 | if (up->parent_index + 1 < (indx_t)NUMKEYS(up->parent)(((up->parent)->page->b.fb.fb_lower - __builtin_offsetof (struct page, ptrs)) >> 1)) { | |||
1351 | ubound = up->parent_index + 1; | |||
1352 | break; | |||
1353 | } | |||
1354 | up = up->parent; | |||
1355 | } | |||
1356 | ||||
1357 | if (lp->parent != NULL((void *)0) && up->parent != NULL((void *)0)) { | |||
1358 | expand_prefix(bt, lp->parent, lbound, &lprefix); | |||
1359 | expand_prefix(bt, up->parent, ubound, &uprefix); | |||
1360 | common_prefix(bt, &lprefix, &uprefix, &mp->prefix); | |||
1361 | } | |||
1362 | else if (mp->parent) | |||
1363 | bcopy(&mp->parent->prefix, &mp->prefix, sizeof(mp->prefix)); | |||
1364 | ||||
1365 | DPRINTF("found common prefix [%.*s] (len %zu) for page %u", | |||
1366 | (int)mp->prefix.len, mp->prefix.str, mp->prefix.len, mp->pgno); | |||
1367 | } | |||
1368 | ||||
1369 | static int | |||
1370 | btree_search_page_root(struct btree *bt, struct mpage *root, struct btval *key, | |||
1371 | struct cursor *cursor, int modify, struct mpage **mpp) | |||
1372 | { | |||
1373 | struct mpage *mp, *parent; | |||
1374 | ||||
1375 | if (cursor && cursor_push_page(cursor, root) == NULL((void *)0)) | |||
1376 | return BT_FAIL-1; | |||
1377 | ||||
1378 | mp = root; | |||
1379 | while (IS_BRANCH(mp)((((mp)->page->flags) & (0x01)) == (0x01))) { | |||
1380 | unsigned int i = 0; | |||
1381 | struct node *node; | |||
1382 | ||||
1383 | DPRINTF("branch page %u has %lu keys", mp->pgno, NUMKEYS(mp)); | |||
1384 | assert(NUMKEYS(mp) > 1)(((((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1) > 1) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1384, __func__, "NUMKEYS(mp) > 1")); | |||
1385 | DPRINTF("found index 0 to page %u", NODEPGNO(NODEPTR(mp, 0))); | |||
1386 | ||||
1387 | if (key == NULL((void *)0)) /* Initialize cursor to first page. */ | |||
1388 | i = 0; | |||
1389 | else { | |||
1390 | int exact; | |||
1391 | node = btree_search_node(bt, mp, key, &exact, &i); | |||
1392 | if (node == NULL((void *)0)) | |||
1393 | i = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1) - 1; | |||
1394 | else if (!exact) { | |||
1395 | assert(i > 0)((i > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1395, __func__, "i > 0")); | |||
1396 | i--; | |||
1397 | } | |||
1398 | } | |||
1399 | ||||
1400 | if (key) | |||
1401 | DPRINTF("following index %u for key %.*s", | |||
1402 | i, (int)key->size, (char *)key->data); | |||
1403 | assert(i >= 0 && i < NUMKEYS(mp))((i >= 0 && i < (((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1)) ? (void )0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 1403, __func__ , "i >= 0 && i < NUMKEYS(mp)")); | |||
1404 | node = NODEPTR(mp, i)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[i])); | |||
1405 | ||||
1406 | if (cursor) | |||
1407 | CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first)->ki = i; | |||
1408 | ||||
1409 | parent = mp; | |||
1410 | if ((mp = btree_get_mpage(bt, NODEPGNO(node)((node)->p.np_pgno))) == NULL((void *)0)) | |||
1411 | return BT_FAIL-1; | |||
1412 | mp->parent = parent; | |||
1413 | mp->parent_index = i; | |||
1414 | find_common_prefix(bt, mp); | |||
1415 | ||||
1416 | if (cursor && cursor_push_page(cursor, mp) == NULL((void *)0)) | |||
1417 | return BT_FAIL-1; | |||
1418 | ||||
1419 | if (modify && (mp = mpage_touch(bt, mp)) == NULL((void *)0)) | |||
1420 | return BT_FAIL-1; | |||
1421 | } | |||
1422 | ||||
1423 | if (!IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) { | |||
1424 | DPRINTF("internal error, index points to a %02X page!?", | |||
1425 | mp->page->flags); | |||
1426 | return BT_FAIL-1; | |||
1427 | } | |||
1428 | ||||
1429 | DPRINTF("found leaf page %u for key %.*s", mp->pgno, | |||
1430 | key ? (int)key->size : 0, key ? (char *)key->data : NULL); | |||
1431 | ||||
1432 | *mpp = mp; | |||
1433 | return BT_SUCCESS0; | |||
1434 | } | |||
1435 | ||||
1436 | /* Search for the page a given key should be in. | |||
1437 | * Stores a pointer to the found page in *mpp. | |||
1438 | * If key is NULL, search for the lowest page (used by btree_cursor_first). | |||
1439 | * If cursor is non-null, pushes parent pages on the cursor stack. | |||
1440 | * If modify is true, visited pages are updated with new page numbers. | |||
1441 | */ | |||
1442 | static int | |||
1443 | btree_search_page(struct btree *bt, struct btree_txn *txn, struct btval *key, | |||
1444 | struct cursor *cursor, int modify, struct mpage **mpp) | |||
1445 | { | |||
1446 | int rc; | |||
1447 | pgno_t root; | |||
1448 | struct mpage *mp; | |||
1449 | ||||
1450 | /* Can't modify pages outside a transaction. */ | |||
1451 | if (txn == NULL((void *)0) && modify) { | |||
1452 | errno(*__errno()) = EINVAL22; | |||
1453 | return BT_FAIL-1; | |||
1454 | } | |||
1455 | ||||
1456 | /* Choose which root page to start with. If a transaction is given | |||
1457 | * use the root page from the transaction, otherwise read the last | |||
1458 | * committed root page. | |||
1459 | */ | |||
1460 | if (txn == NULL((void *)0)) { | |||
1461 | if ((rc = btree_read_meta(bt, NULL((void *)0))) != BT_SUCCESS0) | |||
1462 | return rc; | |||
1463 | root = bt->meta.root; | |||
1464 | } else if (F_ISSET(txn->flags, BT_TXN_ERROR)(((txn->flags) & (0x02)) == (0x02))) { | |||
1465 | DPRINTF("transaction has failed, must abort"); | |||
1466 | errno(*__errno()) = EINVAL22; | |||
1467 | return BT_FAIL-1; | |||
1468 | } else | |||
1469 | root = txn->root; | |||
1470 | ||||
1471 | if (root == P_INVALID0xFFFFFFFF) { /* Tree is empty. */ | |||
1472 | DPRINTF("tree is empty"); | |||
1473 | errno(*__errno()) = ENOENT2; | |||
1474 | return BT_FAIL-1; | |||
1475 | } | |||
1476 | ||||
1477 | if ((mp = btree_get_mpage(bt, root)) == NULL((void *)0)) | |||
1478 | return BT_FAIL-1; | |||
1479 | ||||
1480 | DPRINTF("root page has flags 0x%X", mp->page->flags); | |||
1481 | ||||
1482 | assert(mp->parent == NULL)((mp->parent == ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1482, __func__, "mp->parent == NULL")); | |||
1483 | assert(mp->prefix.len == 0)((mp->prefix.len == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1483, __func__, "mp->prefix.len == 0")); | |||
1484 | ||||
1485 | if (modify && !mp->dirty) { | |||
1486 | if ((mp = mpage_touch(bt, mp)) == NULL((void *)0)) | |||
1487 | return BT_FAIL-1; | |||
1488 | txn->root = mp->pgno; | |||
1489 | } | |||
1490 | ||||
1491 | return btree_search_page_root(bt, mp, key, cursor, modify, mpp); | |||
1492 | } | |||
1493 | ||||
1494 | static int | |||
1495 | btree_read_data(struct btree *bt, struct mpage *mp, struct node *leaf, | |||
1496 | struct btval *data) | |||
1497 | { | |||
1498 | struct mpage *omp; /* overflow mpage */ | |||
1499 | size_t psz; | |||
1500 | size_t max; | |||
1501 | size_t sz = 0; | |||
1502 | pgno_t pgno; | |||
1503 | ||||
1504 | memset(data, 0, sizeof(*data)); | |||
1505 | max = bt->head.psize - PAGEHDRSZ__builtin_offsetof(struct page, ptrs); | |||
1506 | ||||
1507 | if (!F_ISSET(leaf->flags, F_BIGDATA)(((leaf->flags) & (0x01)) == (0x01))) { | |||
1508 | data->size = leaf->n_dsizep.np_dsize; | |||
1509 | if (data->size > 0) { | |||
1510 | if (mp == NULL((void *)0)) { | |||
1511 | if ((data->data = malloc(data->size)) == NULL((void *)0)) | |||
1512 | return BT_FAIL-1; | |||
1513 | bcopy(NODEDATA(leaf)(void *)((char *)(leaf)->data + (leaf)->ksize), data->data, data->size); | |||
1514 | data->free_data = 1; | |||
1515 | data->mp = NULL((void *)0); | |||
1516 | } else { | |||
1517 | data->data = NODEDATA(leaf)(void *)((char *)(leaf)->data + (leaf)->ksize); | |||
1518 | data->free_data = 0; | |||
1519 | data->mp = mp; | |||
1520 | mp->ref++; | |||
1521 | } | |||
1522 | } | |||
1523 | return BT_SUCCESS0; | |||
1524 | } | |||
1525 | ||||
1526 | /* Read overflow data. | |||
1527 | */ | |||
1528 | DPRINTF("allocating %u byte for overflow data", leaf->n_dsize); | |||
1529 | if ((data->data = malloc(leaf->n_dsizep.np_dsize)) == NULL((void *)0)) | |||
1530 | return BT_FAIL-1; | |||
1531 | data->size = leaf->n_dsizep.np_dsize; | |||
1532 | data->free_data = 1; | |||
1533 | data->mp = NULL((void *)0); | |||
1534 | bcopy(NODEDATA(leaf)(void *)((char *)(leaf)->data + (leaf)->ksize), &pgno, sizeof(pgno)); | |||
1535 | for (sz = 0; sz < data->size; ) { | |||
1536 | if ((omp = btree_get_mpage(bt, pgno)) == NULL((void *)0) || | |||
1537 | !F_ISSET(omp->page->flags, P_OVERFLOW)(((omp->page->flags) & (0x04)) == (0x04))) { | |||
1538 | DPRINTF("read overflow page %u failed", pgno); | |||
1539 | free(data->data); | |||
1540 | mpage_free(omp); | |||
1541 | return BT_FAIL-1; | |||
1542 | } | |||
1543 | psz = data->size - sz; | |||
1544 | if (psz > max) | |||
1545 | psz = max; | |||
1546 | bcopy(omp->page->ptrs, (char *)data->data + sz, psz); | |||
1547 | sz += psz; | |||
1548 | pgno = omp->page->p_next_pgnob.pb_next_pgno; | |||
1549 | } | |||
1550 | ||||
1551 | return BT_SUCCESS0; | |||
1552 | } | |||
1553 | ||||
1554 | int | |||
1555 | btree_txn_get(struct btree *bt, struct btree_txn *txn, | |||
1556 | struct btval *key, struct btval *data) | |||
1557 | { | |||
1558 | int rc, exact; | |||
1559 | struct node *leaf; | |||
1560 | struct mpage *mp; | |||
1561 | ||||
1562 | assert(key)((key) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1562, __func__, "key")); | |||
1563 | assert(data)((data) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1563, __func__, "data")); | |||
1564 | DPRINTF("===> get key [%.*s]", (int)key->size, (char *)key->data); | |||
1565 | ||||
1566 | if (bt != NULL((void *)0) && txn != NULL((void *)0) && bt != txn->bt) { | |||
1567 | errno(*__errno()) = EINVAL22; | |||
1568 | return BT_FAIL-1; | |||
1569 | } | |||
1570 | ||||
1571 | if (bt == NULL((void *)0)) { | |||
1572 | if (txn == NULL((void *)0)) { | |||
1573 | errno(*__errno()) = EINVAL22; | |||
1574 | return BT_FAIL-1; | |||
1575 | } | |||
1576 | bt = txn->bt; | |||
1577 | } | |||
1578 | ||||
1579 | if (key->size == 0 || key->size > MAXKEYSIZE255) { | |||
1580 | errno(*__errno()) = EINVAL22; | |||
1581 | return BT_FAIL-1; | |||
1582 | } | |||
1583 | ||||
1584 | if ((rc = btree_search_page(bt, txn, key, NULL((void *)0), 0, &mp)) != BT_SUCCESS0) | |||
1585 | return rc; | |||
1586 | ||||
1587 | leaf = btree_search_node(bt, mp, key, &exact, NULL((void *)0)); | |||
1588 | if (leaf && exact) | |||
1589 | rc = btree_read_data(bt, mp, leaf, data); | |||
1590 | else { | |||
1591 | errno(*__errno()) = ENOENT2; | |||
1592 | rc = BT_FAIL-1; | |||
1593 | } | |||
1594 | ||||
1595 | mpage_prune(bt); | |||
1596 | return rc; | |||
1597 | } | |||
1598 | ||||
1599 | static int | |||
1600 | btree_sibling(struct cursor *cursor, int move_right) | |||
1601 | { | |||
1602 | int rc; | |||
1603 | struct node *indx; | |||
1604 | struct ppage *parent, *top; | |||
1605 | struct mpage *mp; | |||
1606 | ||||
1607 | top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first); | |||
1608 | if ((parent = SLIST_NEXT(top, entry)((top)->entry.sle_next)) == NULL((void *)0)) { | |||
1609 | errno(*__errno()) = ENOENT2; | |||
1610 | return BT_FAIL-1; /* root has no siblings */ | |||
1611 | } | |||
1612 | ||||
1613 | DPRINTF("parent page is page %u, index %u", | |||
1614 | parent->mpage->pgno, parent->ki); | |||
1615 | ||||
1616 | cursor_pop_page(cursor); | |||
1617 | if (move_right ? (parent->ki + 1 >= NUMKEYS(parent->mpage)(((parent->mpage)->page->b.fb.fb_lower - __builtin_offsetof (struct page, ptrs)) >> 1)) | |||
1618 | : (parent->ki == 0)) { | |||
1619 | DPRINTF("no more keys left, moving to %s sibling", | |||
1620 | move_right ? "right" : "left"); | |||
1621 | if ((rc = btree_sibling(cursor, move_right)) != BT_SUCCESS0) | |||
1622 | return rc; | |||
1623 | parent = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first); | |||
1624 | } else { | |||
1625 | if (move_right) | |||
1626 | parent->ki++; | |||
1627 | else | |||
1628 | parent->ki--; | |||
1629 | DPRINTF("just moving to %s index key %u", | |||
1630 | move_right ? "right" : "left", parent->ki); | |||
1631 | } | |||
1632 | assert(IS_BRANCH(parent->mpage))((((((parent->mpage)->page->flags) & (0x01)) == ( 0x01))) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1632, __func__, "IS_BRANCH(parent->mpage)")); | |||
1633 | ||||
1634 | indx = NODEPTR(parent->mpage, parent->ki)((struct node *)((char *)((parent->mpage)->page) + ((parent ->mpage)->page)->ptrs[parent->ki])); | |||
1635 | if ((mp = btree_get_mpage(cursor->bt, indx->n_pgnop.np_pgno)) == NULL((void *)0)) | |||
1636 | return BT_FAIL-1; | |||
1637 | mp->parent = parent->mpage; | |||
1638 | mp->parent_index = parent->ki; | |||
1639 | ||||
1640 | cursor_push_page(cursor, mp); | |||
1641 | find_common_prefix(cursor->bt, mp); | |||
1642 | ||||
1643 | return BT_SUCCESS0; | |||
1644 | } | |||
1645 | ||||
1646 | static int | |||
1647 | bt_set_key(struct btree *bt, struct mpage *mp, struct node *node, | |||
1648 | struct btval *key) | |||
1649 | { | |||
1650 | if (key == NULL((void *)0)) | |||
1651 | return 0; | |||
1652 | ||||
1653 | if (mp->prefix.len > 0) { | |||
1654 | key->size = node->ksize + mp->prefix.len; | |||
1655 | key->data = malloc(key->size); | |||
1656 | if (key->data == NULL((void *)0)) | |||
1657 | return -1; | |||
1658 | concat_prefix(bt, | |||
1659 | mp->prefix.str, mp->prefix.len, | |||
1660 | NODEKEY(node)(void *)((node)->data), node->ksize, | |||
1661 | key->data, &key->size); | |||
1662 | key->free_data = 1; | |||
1663 | } else { | |||
1664 | key->size = node->ksize; | |||
1665 | key->data = NODEKEY(node)(void *)((node)->data); | |||
1666 | key->free_data = 0; | |||
1667 | key->mp = mp; | |||
1668 | mp->ref++; | |||
1669 | } | |||
1670 | ||||
1671 | return 0; | |||
1672 | } | |||
1673 | ||||
1674 | static int | |||
1675 | btree_cursor_next(struct cursor *cursor, struct btval *key, struct btval *data) | |||
1676 | { | |||
1677 | struct ppage *top; | |||
1678 | struct mpage *mp; | |||
1679 | struct node *leaf; | |||
1680 | ||||
1681 | if (cursor->eof) { | |||
1682 | errno(*__errno()) = ENOENT2; | |||
1683 | return BT_FAIL-1; | |||
1684 | } | |||
1685 | ||||
1686 | assert(cursor->initialized)((cursor->initialized) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1686, __func__, "cursor->initialized")); | |||
1687 | ||||
1688 | top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first); | |||
1689 | mp = top->mpage; | |||
1690 | ||||
1691 | DPRINTF("cursor_next: top page is %u in cursor %p", mp->pgno, cursor); | |||
1692 | ||||
1693 | if (top->ki + 1 >= NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1)) { | |||
1694 | DPRINTF("=====> move to next sibling page"); | |||
1695 | if (btree_sibling(cursor, 1) != BT_SUCCESS0) { | |||
1696 | cursor->eof = 1; | |||
1697 | return BT_FAIL-1; | |||
1698 | } | |||
1699 | top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first); | |||
1700 | mp = top->mpage; | |||
1701 | DPRINTF("next page is %u, key index %u", mp->pgno, top->ki); | |||
1702 | } else | |||
1703 | top->ki++; | |||
1704 | ||||
1705 | DPRINTF("==> cursor points to page %u with %lu keys, key index %u", | |||
1706 | mp->pgno, NUMKEYS(mp), top->ki); | |||
1707 | ||||
1708 | assert(IS_LEAF(mp))((((((mp)->page->flags) & (0x02)) == (0x02))) ? (void )0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 1708, __func__ , "IS_LEAF(mp)")); | |||
1709 | leaf = NODEPTR(mp, top->ki)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[top->ki])); | |||
1710 | ||||
1711 | if (data && btree_read_data(cursor->bt, mp, leaf, data) != BT_SUCCESS0) | |||
1712 | return BT_FAIL-1; | |||
1713 | ||||
1714 | if (bt_set_key(cursor->bt, mp, leaf, key) != 0) | |||
1715 | return BT_FAIL-1; | |||
1716 | ||||
1717 | return BT_SUCCESS0; | |||
1718 | } | |||
1719 | ||||
1720 | static int | |||
1721 | btree_cursor_set(struct cursor *cursor, struct btval *key, struct btval *data, | |||
1722 | int *exactp) | |||
1723 | { | |||
1724 | int rc; | |||
1725 | struct node *leaf; | |||
1726 | struct mpage *mp; | |||
1727 | struct ppage *top; | |||
1728 | ||||
1729 | assert(cursor)((cursor) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1729, __func__, "cursor")); | |||
1730 | assert(key)((key) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1730, __func__, "key")); | |||
1731 | assert(key->size > 0)((key->size > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1731, __func__, "key->size > 0")); | |||
1732 | ||||
1733 | rc = btree_search_page(cursor->bt, cursor->txn, key, cursor, 0, &mp); | |||
1734 | if (rc != BT_SUCCESS0) | |||
1735 | return rc; | |||
1736 | assert(IS_LEAF(mp))((((((mp)->page->flags) & (0x02)) == (0x02))) ? (void )0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 1736, __func__ , "IS_LEAF(mp)")); | |||
1737 | ||||
1738 | top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first); | |||
1739 | leaf = btree_search_node(cursor->bt, mp, key, exactp, &top->ki); | |||
1740 | if (exactp != NULL((void *)0) && !*exactp) { | |||
1741 | /* BT_CURSOR_EXACT specified and not an exact match. */ | |||
1742 | errno(*__errno()) = ENOENT2; | |||
1743 | return BT_FAIL-1; | |||
1744 | } | |||
1745 | ||||
1746 | if (leaf == NULL((void *)0)) { | |||
1747 | DPRINTF("===> inexact leaf not found, goto sibling"); | |||
1748 | if (btree_sibling(cursor, 1) != BT_SUCCESS0) | |||
1749 | return BT_FAIL-1; /* no entries matched */ | |||
1750 | top = CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first); | |||
1751 | top->ki = 0; | |||
1752 | mp = top->mpage; | |||
1753 | assert(IS_LEAF(mp))((((((mp)->page->flags) & (0x02)) == (0x02))) ? (void )0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 1753, __func__ , "IS_LEAF(mp)")); | |||
1754 | leaf = NODEPTR(mp, 0)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[0])); | |||
1755 | } | |||
1756 | ||||
1757 | cursor->initialized = 1; | |||
1758 | cursor->eof = 0; | |||
1759 | ||||
1760 | if (data && btree_read_data(cursor->bt, mp, leaf, data) != BT_SUCCESS0) | |||
1761 | return BT_FAIL-1; | |||
1762 | ||||
1763 | if (bt_set_key(cursor->bt, mp, leaf, key) != 0) | |||
1764 | return BT_FAIL-1; | |||
1765 | DPRINTF("==> cursor placed on key %.*s", | |||
1766 | (int)key->size, (char *)key->data); | |||
1767 | ||||
1768 | return BT_SUCCESS0; | |||
1769 | } | |||
1770 | ||||
1771 | static int | |||
1772 | btree_cursor_first(struct cursor *cursor, struct btval *key, struct btval *data) | |||
1773 | { | |||
1774 | int rc; | |||
1775 | struct mpage *mp; | |||
1776 | struct node *leaf; | |||
1777 | ||||
1778 | rc = btree_search_page(cursor->bt, cursor->txn, NULL((void *)0), cursor, 0, &mp); | |||
1779 | if (rc != BT_SUCCESS0) | |||
1780 | return rc; | |||
1781 | assert(IS_LEAF(mp))((((((mp)->page->flags) & (0x02)) == (0x02))) ? (void )0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 1781, __func__ , "IS_LEAF(mp)")); | |||
1782 | ||||
1783 | leaf = NODEPTR(mp, 0)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[0])); | |||
1784 | cursor->initialized = 1; | |||
1785 | cursor->eof = 0; | |||
1786 | ||||
1787 | if (data && btree_read_data(cursor->bt, mp, leaf, data) != BT_SUCCESS0) | |||
1788 | return BT_FAIL-1; | |||
1789 | ||||
1790 | if (bt_set_key(cursor->bt, mp, leaf, key) != 0) | |||
1791 | return BT_FAIL-1; | |||
1792 | ||||
1793 | return BT_SUCCESS0; | |||
1794 | } | |||
1795 | ||||
1796 | int | |||
1797 | btree_cursor_get(struct cursor *cursor, struct btval *key, struct btval *data, | |||
1798 | enum cursor_op op) | |||
1799 | { | |||
1800 | int rc; | |||
1801 | int exact = 0; | |||
1802 | ||||
1803 | assert(cursor)((cursor) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1803, __func__, "cursor")); | |||
1804 | ||||
1805 | switch (op) { | |||
1806 | case BT_CURSOR: | |||
1807 | case BT_CURSOR_EXACT: | |||
1808 | while (CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first) != NULL((void *)0)) | |||
1809 | cursor_pop_page(cursor); | |||
1810 | if (key == NULL((void *)0) || key->size == 0 || key->size > MAXKEYSIZE255) { | |||
1811 | errno(*__errno()) = EINVAL22; | |||
1812 | rc = BT_FAIL-1; | |||
1813 | } else if (op == BT_CURSOR_EXACT) | |||
1814 | rc = btree_cursor_set(cursor, key, data, &exact); | |||
1815 | else | |||
1816 | rc = btree_cursor_set(cursor, key, data, NULL((void *)0)); | |||
1817 | break; | |||
1818 | case BT_NEXT: | |||
1819 | if (!cursor->initialized) | |||
1820 | rc = btree_cursor_first(cursor, key, data); | |||
1821 | else | |||
1822 | rc = btree_cursor_next(cursor, key, data); | |||
1823 | break; | |||
1824 | case BT_FIRST: | |||
1825 | while (CURSOR_TOP(cursor)((&(cursor)->stack)->slh_first) != NULL((void *)0)) | |||
1826 | cursor_pop_page(cursor); | |||
1827 | rc = btree_cursor_first(cursor, key, data); | |||
1828 | break; | |||
1829 | default: | |||
1830 | DPRINTF("unhandled/unimplemented cursor operation %u", op); | |||
1831 | rc = BT_FAIL-1; | |||
1832 | break; | |||
1833 | } | |||
1834 | ||||
1835 | mpage_prune(cursor->bt); | |||
1836 | ||||
1837 | return rc; | |||
1838 | } | |||
1839 | ||||
1840 | static struct mpage * | |||
1841 | btree_new_page(struct btree *bt, uint32_t flags) | |||
1842 | { | |||
1843 | struct mpage *mp; | |||
1844 | ||||
1845 | assert(bt != NULL)((bt != ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1845, __func__, "bt != NULL")); | |||
1846 | assert(bt->txn != NULL)((bt->txn != ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1846, __func__, "bt->txn != NULL")); | |||
1847 | ||||
1848 | DPRINTF("allocating new mpage %u, page size %u", | |||
1849 | bt->txn->next_pgno, bt->head.psize); | |||
1850 | if ((mp = calloc(1, sizeof(*mp))) == NULL((void *)0)) | |||
1851 | return NULL((void *)0); | |||
1852 | if ((mp->page = malloc(bt->head.psize)) == NULL((void *)0)) { | |||
1853 | free(mp); | |||
1854 | return NULL((void *)0); | |||
1855 | } | |||
1856 | mp->pgno = mp->page->pgno = bt->txn->next_pgno++; | |||
1857 | mp->page->flags = flags; | |||
1858 | mp->page->lowerb.fb.fb_lower = PAGEHDRSZ__builtin_offsetof(struct page, ptrs); | |||
1859 | mp->page->upperb.fb.fb_upper = bt->head.psize; | |||
1860 | ||||
1861 | if (IS_BRANCH(mp)((((mp)->page->flags) & (0x01)) == (0x01))) | |||
1862 | bt->meta.branch_pages++; | |||
1863 | else if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) | |||
1864 | bt->meta.leaf_pages++; | |||
1865 | else if (IS_OVERFLOW(mp)((((mp)->page->flags) & (0x04)) == (0x04))) | |||
1866 | bt->meta.overflow_pages++; | |||
1867 | ||||
1868 | mpage_add(bt, mp); | |||
1869 | mpage_dirty(bt, mp); | |||
1870 | ||||
1871 | return mp; | |||
1872 | } | |||
1873 | ||||
1874 | static size_t | |||
1875 | bt_leaf_size(struct btree *bt, struct btval *key, struct btval *data) | |||
1876 | { | |||
1877 | size_t sz; | |||
1878 | ||||
1879 | sz = LEAFSIZE(key, data)(__builtin_offsetof(struct node, data) + (key)->size + (data )->size); | |||
1880 | if (data->size >= bt->head.psize / BT_MINKEYS4) { | |||
1881 | /* put on overflow page */ | |||
1882 | sz -= data->size - sizeof(pgno_t); | |||
1883 | } | |||
1884 | ||||
1885 | return sz + sizeof(indx_t); | |||
1886 | } | |||
1887 | ||||
1888 | static size_t | |||
1889 | bt_branch_size(struct btree *bt, struct btval *key) | |||
1890 | { | |||
1891 | size_t sz; | |||
1892 | ||||
1893 | sz = INDXSIZE(key)(__builtin_offsetof(struct node, data) + ((key) == ((void *)0 ) ? 0 : (key)->size)); | |||
1894 | if (sz >= bt->head.psize / BT_MINKEYS4) { | |||
1895 | /* put on overflow page */ | |||
1896 | /* not implemented */ | |||
1897 | /* sz -= key->size - sizeof(pgno_t); */ | |||
1898 | } | |||
1899 | ||||
1900 | return sz + sizeof(indx_t); | |||
1901 | } | |||
1902 | ||||
1903 | static int | |||
1904 | btree_write_overflow_data(struct btree *bt, struct page *p, struct btval *data) | |||
1905 | { | |||
1906 | size_t done = 0; | |||
1907 | size_t sz; | |||
1908 | size_t max; | |||
1909 | pgno_t *linkp; /* linked page stored here */ | |||
1910 | struct mpage *next = NULL((void *)0); | |||
1911 | ||||
1912 | max = bt->head.psize - PAGEHDRSZ__builtin_offsetof(struct page, ptrs); | |||
1913 | ||||
1914 | while (done < data->size) { | |||
1915 | if (next != NULL((void *)0)) | |||
1916 | p = next->page; | |||
1917 | linkp = &p->p_next_pgnob.pb_next_pgno; | |||
1918 | if (data->size - done > max) { | |||
1919 | /* need another overflow page */ | |||
1920 | if ((next = btree_new_page(bt, P_OVERFLOW0x04)) == NULL((void *)0)) | |||
1921 | return BT_FAIL-1; | |||
1922 | *linkp = next->pgno; | |||
1923 | DPRINTF("linking overflow page %u", next->pgno); | |||
1924 | } else | |||
1925 | *linkp = 0; /* indicates end of list */ | |||
1926 | sz = data->size - done; | |||
1927 | if (sz > max) | |||
1928 | sz = max; | |||
1929 | DPRINTF("copying %zu bytes to overflow page %u", sz, p->pgno); | |||
1930 | bcopy((char *)data->data + done, p->ptrs, sz); | |||
1931 | done += sz; | |||
1932 | } | |||
1933 | ||||
1934 | return BT_SUCCESS0; | |||
1935 | } | |||
1936 | ||||
1937 | /* Key prefix should already be stripped. | |||
1938 | */ | |||
1939 | static int | |||
1940 | btree_add_node(struct btree *bt, struct mpage *mp, indx_t indx, | |||
1941 | struct btval *key, struct btval *data, pgno_t pgno, uint8_t flags) | |||
1942 | { | |||
1943 | unsigned int i; | |||
1944 | size_t node_size = NODESIZE__builtin_offsetof(struct node, data); | |||
1945 | indx_t ofs; | |||
1946 | struct node *node; | |||
1947 | struct page *p; | |||
1948 | struct mpage *ofp = NULL((void *)0); /* overflow page */ | |||
1949 | ||||
1950 | p = mp->page; | |||
1951 | assert(p->upper >= p->lower)((p->b.fb.fb_upper >= p->b.fb.fb_lower) ? (void)0 : __assert2 ("/usr/src/usr.sbin/ldapd/btree.c", 1951, __func__, "p->upper >= p->lower" )); | |||
1952 | ||||
1953 | DPRINTF("add node [%.*s] to %s page %u at index %d, key size %zu", | |||
1954 | key ? (int)key->size : 0, key ? (char *)key->data : NULL, | |||
1955 | IS_LEAF(mp) ? "leaf" : "branch", | |||
1956 | mp->pgno, indx, key ? key->size : 0); | |||
1957 | ||||
1958 | if (key != NULL((void *)0)) | |||
1959 | node_size += key->size; | |||
1960 | ||||
1961 | if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) { | |||
1962 | assert(data)((data) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 1962, __func__, "data")); | |||
1963 | node_size += data->size; | |||
1964 | if (F_ISSET(flags, F_BIGDATA)(((flags) & (0x01)) == (0x01))) { | |||
1965 | /* Data already on overflow page. */ | |||
1966 | node_size -= data->size - sizeof(pgno_t); | |||
1967 | } else if (data->size >= bt->head.psize / BT_MINKEYS4) { | |||
1968 | /* Put data on overflow page. */ | |||
1969 | DPRINTF("data size is %zu, put on overflow page", | |||
1970 | data->size); | |||
1971 | node_size -= data->size - sizeof(pgno_t); | |||
1972 | if ((ofp = btree_new_page(bt, P_OVERFLOW0x04)) == NULL((void *)0)) | |||
1973 | return BT_FAIL-1; | |||
1974 | DPRINTF("allocated overflow page %u", ofp->pgno); | |||
1975 | flags |= F_BIGDATA0x01; | |||
1976 | } | |||
1977 | } | |||
1978 | ||||
1979 | if (node_size + sizeof(indx_t) > SIZELEFT(mp)(indx_t)((mp)->page->b.fb.fb_upper - (mp)->page-> b.fb.fb_lower)) { | |||
1980 | DPRINTF("not enough room in page %u, got %lu ptrs", | |||
1981 | mp->pgno, NUMKEYS(mp)); | |||
1982 | DPRINTF("upper - lower = %u - %u = %u", p->upper, p->lower, | |||
1983 | p->upper - p->lower); | |||
1984 | DPRINTF("node size = %zu", node_size); | |||
1985 | return BT_FAIL-1; | |||
1986 | } | |||
1987 | ||||
1988 | /* Move higher pointers up one slot. */ | |||
1989 | for (i = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1); i > indx; i--) | |||
1990 | p->ptrs[i] = p->ptrs[i - 1]; | |||
1991 | ||||
1992 | /* Adjust free space offsets. */ | |||
1993 | ofs = p->upperb.fb.fb_upper - node_size; | |||
1994 | assert(ofs >= p->lower + sizeof(indx_t))((ofs >= p->b.fb.fb_lower + sizeof(indx_t)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 1994, __func__, "ofs >= p->lower + sizeof(indx_t)")); | |||
1995 | p->ptrs[indx] = ofs; | |||
1996 | p->upperb.fb.fb_upper = ofs; | |||
1997 | p->lowerb.fb.fb_lower += sizeof(indx_t); | |||
1998 | ||||
1999 | /* Write the node data. */ | |||
2000 | node = NODEPTR(mp, indx)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[indx])); | |||
2001 | node->ksize = (key == NULL((void *)0)) ? 0 : key->size; | |||
2002 | node->flags = flags; | |||
2003 | if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) | |||
2004 | node->n_dsizep.np_dsize = data->size; | |||
2005 | else | |||
2006 | node->n_pgnop.np_pgno = pgno; | |||
2007 | ||||
2008 | if (key) | |||
2009 | bcopy(key->data, NODEKEY(node)(void *)((node)->data), key->size); | |||
2010 | ||||
2011 | if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) { | |||
2012 | assert(key)((key) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2012, __func__, "key")); | |||
2013 | if (ofp == NULL((void *)0)) { | |||
2014 | if (F_ISSET(flags, F_BIGDATA)(((flags) & (0x01)) == (0x01))) | |||
2015 | bcopy(data->data, node->data + key->size, | |||
2016 | sizeof(pgno_t)); | |||
2017 | else | |||
2018 | bcopy(data->data, node->data + key->size, | |||
2019 | data->size); | |||
2020 | } else { | |||
2021 | bcopy(&ofp->pgno, node->data + key->size, | |||
2022 | sizeof(pgno_t)); | |||
2023 | if (btree_write_overflow_data(bt, ofp->page, | |||
2024 | data) == BT_FAIL-1) | |||
2025 | return BT_FAIL-1; | |||
2026 | } | |||
2027 | } | |||
2028 | ||||
2029 | return BT_SUCCESS0; | |||
2030 | } | |||
2031 | ||||
2032 | static void | |||
2033 | btree_del_node(struct btree *bt, struct mpage *mp, indx_t indx) | |||
2034 | { | |||
2035 | unsigned int sz; | |||
2036 | indx_t i, j, numkeys, ptr; | |||
2037 | struct node *node; | |||
2038 | char *base; | |||
2039 | ||||
2040 | DPRINTF("delete node %u on %s page %u", indx, | |||
2041 | IS_LEAF(mp) ? "leaf" : "branch", mp->pgno); | |||
2042 | assert(indx < NUMKEYS(mp))((indx < (((mp)->page->b.fb.fb_lower - __builtin_offsetof (struct page, ptrs)) >> 1)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2042, __func__, "indx < NUMKEYS(mp)")); | |||
2043 | ||||
2044 | node = NODEPTR(mp, indx)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[indx])); | |||
2045 | sz = NODESIZE__builtin_offsetof(struct node, data) + node->ksize; | |||
2046 | if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) { | |||
2047 | if (F_ISSET(node->flags, F_BIGDATA)(((node->flags) & (0x01)) == (0x01))) | |||
2048 | sz += sizeof(pgno_t); | |||
2049 | else | |||
2050 | sz += NODEDSZ(node)((node)->p.np_dsize); | |||
2051 | } | |||
2052 | ||||
2053 | ptr = mp->page->ptrs[indx]; | |||
2054 | numkeys = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1); | |||
2055 | for (i = j = 0; i < numkeys; i++) { | |||
2056 | if (i != indx) { | |||
2057 | mp->page->ptrs[j] = mp->page->ptrs[i]; | |||
2058 | if (mp->page->ptrs[i] < ptr) | |||
2059 | mp->page->ptrs[j] += sz; | |||
2060 | j++; | |||
2061 | } | |||
2062 | } | |||
2063 | ||||
2064 | base = (char *)mp->page + mp->page->upperb.fb.fb_upper; | |||
2065 | bcopy(base, base + sz, ptr - mp->page->upperb.fb.fb_upper); | |||
2066 | ||||
2067 | mp->page->lowerb.fb.fb_lower -= sizeof(indx_t); | |||
2068 | mp->page->upperb.fb.fb_upper += sz; | |||
2069 | } | |||
2070 | ||||
2071 | struct cursor * | |||
2072 | btree_txn_cursor_open(struct btree *bt, struct btree_txn *txn) | |||
2073 | { | |||
2074 | struct cursor *cursor; | |||
2075 | ||||
2076 | if (bt != NULL((void *)0) && txn != NULL((void *)0) && bt != txn->bt) { | |||
2077 | errno(*__errno()) = EINVAL22; | |||
2078 | return NULL((void *)0); | |||
2079 | } | |||
2080 | ||||
2081 | if (bt == NULL((void *)0)) { | |||
2082 | if (txn == NULL((void *)0)) { | |||
2083 | errno(*__errno()) = EINVAL22; | |||
2084 | return NULL((void *)0); | |||
2085 | } | |||
2086 | bt = txn->bt; | |||
2087 | } | |||
2088 | ||||
2089 | if ((cursor = calloc(1, sizeof(*cursor))) != NULL((void *)0)) { | |||
2090 | SLIST_INIT(&cursor->stack){ ((&cursor->stack)->slh_first) = ((void *)0); }; | |||
2091 | cursor->bt = bt; | |||
2092 | cursor->txn = txn; | |||
2093 | btree_ref(bt); | |||
2094 | } | |||
2095 | ||||
2096 | return cursor; | |||
2097 | } | |||
2098 | ||||
2099 | void | |||
2100 | btree_cursor_close(struct cursor *cursor) | |||
2101 | { | |||
2102 | if (cursor != NULL((void *)0)) { | |||
2103 | while (!CURSOR_EMPTY(cursor)(((&(cursor)->stack)->slh_first) == ((void *)0))) | |||
2104 | cursor_pop_page(cursor); | |||
2105 | ||||
2106 | btree_close(cursor->bt); | |||
2107 | free(cursor); | |||
2108 | } | |||
2109 | } | |||
2110 | ||||
2111 | static int | |||
2112 | btree_update_key(struct btree *bt, struct mpage *mp, indx_t indx, | |||
2113 | struct btval *key) | |||
2114 | { | |||
2115 | indx_t ptr, i, numkeys; | |||
2116 | int delta; | |||
2117 | size_t len; | |||
2118 | struct node *node; | |||
2119 | char *base; | |||
2120 | ||||
2121 | node = NODEPTR(mp, indx)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[indx])); | |||
2122 | ptr = mp->page->ptrs[indx]; | |||
2123 | DPRINTF("update key %u (ofs %u) [%.*s] to [%.*s] on page %u", | |||
2124 | indx, ptr, | |||
2125 | (int)node->ksize, (char *)NODEKEY(node), | |||
2126 | (int)key->size, (char *)key->data, | |||
2127 | mp->pgno); | |||
2128 | ||||
2129 | if (key->size != node->ksize) { | |||
2130 | delta = key->size - node->ksize; | |||
2131 | if (delta > 0 && SIZELEFT(mp)(indx_t)((mp)->page->b.fb.fb_upper - (mp)->page-> b.fb.fb_lower) < delta) { | |||
2132 | DPRINTF("OUCH! Not enough room, delta = %d", delta); | |||
2133 | return BT_FAIL-1; | |||
2134 | } | |||
2135 | ||||
2136 | numkeys = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1); | |||
2137 | for (i = 0; i < numkeys; i++) { | |||
2138 | if (mp->page->ptrs[i] <= ptr) | |||
2139 | mp->page->ptrs[i] -= delta; | |||
2140 | } | |||
2141 | ||||
2142 | base = (char *)mp->page + mp->page->upperb.fb.fb_upper; | |||
2143 | len = ptr - mp->page->upperb.fb.fb_upper + NODESIZE__builtin_offsetof(struct node, data); | |||
2144 | bcopy(base, base - delta, len); | |||
2145 | mp->page->upperb.fb.fb_upper -= delta; | |||
2146 | ||||
2147 | node = NODEPTR(mp, indx)((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[indx])); | |||
2148 | node->ksize = key->size; | |||
2149 | } | |||
2150 | ||||
2151 | bcopy(key->data, NODEKEY(node)(void *)((node)->data), key->size); | |||
2152 | ||||
2153 | return BT_SUCCESS0; | |||
2154 | } | |||
2155 | ||||
2156 | static int | |||
2157 | btree_adjust_prefix(struct btree *bt, struct mpage *src, int delta) | |||
2158 | { | |||
2159 | indx_t i; | |||
2160 | struct node *node; | |||
2161 | struct btkey tmpkey; | |||
2162 | struct btval key; | |||
2163 | ||||
2164 | DPRINTF("adjusting prefix lengths on page %u with delta %d", | |||
2165 | src->pgno, delta); | |||
2166 | assert(delta != 0)((delta != 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2166, __func__, "delta != 0")); | |||
2167 | ||||
2168 | for (i = 0; i < NUMKEYS(src)(((src)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1); i++) { | |||
2169 | node = NODEPTR(src, i)((struct node *)((char *)((src)->page) + ((src)->page)-> ptrs[i])); | |||
2170 | tmpkey.len = node->ksize - delta; | |||
2171 | if (delta > 0) { | |||
2172 | if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) | |||
2173 | bcopy(NODEKEY(node)(void *)((node)->data), tmpkey.str, tmpkey.len); | |||
2174 | else | |||
2175 | bcopy((char *)NODEKEY(node)(void *)((node)->data) + delta, tmpkey.str, | |||
2176 | tmpkey.len); | |||
2177 | } else { | |||
2178 | if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) { | |||
2179 | bcopy(NODEKEY(node)(void *)((node)->data), tmpkey.str, node->ksize); | |||
2180 | bcopy(src->prefix.str, tmpkey.str + node->ksize, | |||
2181 | -delta); | |||
2182 | } else { | |||
2183 | bcopy(src->prefix.str + src->prefix.len + delta, | |||
2184 | tmpkey.str, -delta); | |||
2185 | bcopy(NODEKEY(node)(void *)((node)->data), tmpkey.str - delta, | |||
2186 | node->ksize); | |||
2187 | } | |||
2188 | } | |||
2189 | key.size = tmpkey.len; | |||
2190 | key.data = tmpkey.str; | |||
2191 | if (btree_update_key(bt, src, i, &key) != BT_SUCCESS0) | |||
2192 | return BT_FAIL-1; | |||
2193 | } | |||
2194 | ||||
2195 | return BT_SUCCESS0; | |||
2196 | } | |||
2197 | ||||
2198 | /* Move a node from src to dst. | |||
2199 | */ | |||
2200 | static int | |||
2201 | btree_move_node(struct btree *bt, struct mpage *src, indx_t srcindx, | |||
2202 | struct mpage *dst, indx_t dstindx) | |||
2203 | { | |||
2204 | int rc; | |||
2205 | unsigned int pfxlen, mp_pfxlen = 0; | |||
2206 | struct node *srcnode; | |||
2207 | struct mpage *mp = NULL((void *)0); | |||
2208 | struct btkey tmpkey, srckey; | |||
2209 | struct btval key, data; | |||
2210 | ||||
2211 | assert(src->parent)((src->parent) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2211, __func__, "src->parent")); | |||
2212 | assert(dst->parent)((dst->parent) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2212, __func__, "dst->parent")); | |||
2213 | ||||
2214 | srcnode = NODEPTR(src, srcindx)((struct node *)((char *)((src)->page) + ((src)->page)-> ptrs[srcindx])); | |||
2215 | DPRINTF("moving %s node %u [%.*s] on page %u to node %u on page %u", | |||
2216 | IS_LEAF(src) ? "leaf" : "branch", | |||
2217 | srcindx, | |||
2218 | (int)srcnode->ksize, (char *)NODEKEY(srcnode), | |||
2219 | src->pgno, | |||
2220 | dstindx, dst->pgno); | |||
2221 | ||||
2222 | find_common_prefix(bt, src); | |||
2223 | ||||
2224 | if (IS_BRANCH(src)((((src)->page->flags) & (0x01)) == (0x01))) { | |||
2225 | /* Need to check if the page the moved node points to | |||
2226 | * changes prefix. | |||
2227 | */ | |||
2228 | if ((mp = btree_get_mpage(bt, NODEPGNO(srcnode)((srcnode)->p.np_pgno))) == NULL((void *)0)) | |||
2229 | return BT_FAIL-1; | |||
2230 | mp->parent = src; | |||
2231 | mp->parent_index = srcindx; | |||
2232 | find_common_prefix(bt, mp); | |||
2233 | mp_pfxlen = mp->prefix.len; | |||
2234 | } | |||
2235 | ||||
2236 | /* Mark src and dst as dirty. */ | |||
2237 | if ((src = mpage_touch(bt, src)) == NULL((void *)0) || | |||
2238 | (dst = mpage_touch(bt, dst)) == NULL((void *)0)) | |||
2239 | return BT_FAIL-1; | |||
2240 | ||||
2241 | find_common_prefix(bt, dst); | |||
2242 | ||||
2243 | /* Check if src node has destination page prefix. Otherwise the | |||
2244 | * destination page must expand its prefix on all its nodes. | |||
2245 | */ | |||
2246 | srckey.len = srcnode->ksize; | |||
2247 | bcopy(NODEKEY(srcnode)(void *)((srcnode)->data), srckey.str, srckey.len); | |||
2248 | common_prefix(bt, &srckey, &dst->prefix, &tmpkey); | |||
2249 | if (tmpkey.len != dst->prefix.len) { | |||
2250 | if (btree_adjust_prefix(bt, dst, | |||
2251 | tmpkey.len - dst->prefix.len) != BT_SUCCESS0) | |||
2252 | return BT_FAIL-1; | |||
2253 | bcopy(&tmpkey, &dst->prefix, sizeof(tmpkey)); | |||
2254 | } | |||
2255 | ||||
2256 | if (srcindx == 0 && IS_BRANCH(src)((((src)->page->flags) & (0x01)) == (0x01))) { | |||
2257 | struct mpage *low; | |||
2258 | ||||
2259 | /* must find the lowest key below src | |||
2260 | */ | |||
2261 | assert(btree_search_page_root(bt, src, NULL, NULL, 0,((btree_search_page_root(bt, src, ((void *)0), ((void *)0), 0 , &low) == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2262, __func__, "btree_search_page_root(bt, src, NULL, NULL, 0, &low) == BT_SUCCESS" )) | |||
2262 | &low) == BT_SUCCESS)((btree_search_page_root(bt, src, ((void *)0), ((void *)0), 0 , &low) == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2262, __func__, "btree_search_page_root(bt, src, NULL, NULL, 0, &low) == BT_SUCCESS" )); | |||
2263 | expand_prefix(bt, low, 0, &srckey); | |||
2264 | DPRINTF("found lowest key [%.*s] on leaf page %u", | |||
2265 | (int)srckey.len, srckey.str, low->pgno); | |||
2266 | } else { | |||
2267 | srckey.len = srcnode->ksize; | |||
2268 | bcopy(NODEKEY(srcnode)(void *)((srcnode)->data), srckey.str, srcnode->ksize); | |||
2269 | } | |||
2270 | find_common_prefix(bt, src); | |||
2271 | ||||
2272 | /* expand the prefix */ | |||
2273 | tmpkey.len = sizeof(tmpkey.str); | |||
2274 | concat_prefix(bt, src->prefix.str, src->prefix.len, | |||
2275 | srckey.str, srckey.len, tmpkey.str, &tmpkey.len); | |||
2276 | ||||
2277 | /* Add the node to the destination page. Adjust prefix for | |||
2278 | * destination page. | |||
2279 | */ | |||
2280 | key.size = tmpkey.len; | |||
2281 | key.data = tmpkey.str; | |||
2282 | remove_prefix(bt, &key, dst->prefix.len); | |||
2283 | data.size = NODEDSZ(srcnode)((srcnode)->p.np_dsize); | |||
2284 | data.data = NODEDATA(srcnode)(void *)((char *)(srcnode)->data + (srcnode)->ksize); | |||
2285 | rc = btree_add_node(bt, dst, dstindx, &key, &data, NODEPGNO(srcnode)((srcnode)->p.np_pgno), | |||
2286 | srcnode->flags); | |||
2287 | if (rc != BT_SUCCESS0) | |||
2288 | return rc; | |||
2289 | ||||
2290 | /* Delete the node from the source page. | |||
2291 | */ | |||
2292 | btree_del_node(bt, src, srcindx); | |||
2293 | ||||
2294 | /* Update the parent separators. | |||
2295 | */ | |||
2296 | if (srcindx == 0 && src->parent_index != 0) { | |||
2297 | expand_prefix(bt, src, 0, &tmpkey); | |||
2298 | key.size = tmpkey.len; | |||
2299 | key.data = tmpkey.str; | |||
2300 | remove_prefix(bt, &key, src->parent->prefix.len); | |||
2301 | ||||
2302 | DPRINTF("update separator for source page %u to [%.*s]", | |||
2303 | src->pgno, (int)key.size, (char *)key.data); | |||
2304 | if (btree_update_key(bt, src->parent, src->parent_index, | |||
2305 | &key) != BT_SUCCESS0) | |||
2306 | return BT_FAIL-1; | |||
2307 | } | |||
2308 | ||||
2309 | if (srcindx == 0 && IS_BRANCH(src)((((src)->page->flags) & (0x01)) == (0x01))) { | |||
2310 | struct btval nullkey; | |||
2311 | nullkey.size = 0; | |||
2312 | assert(btree_update_key(bt, src, 0, &nullkey) == BT_SUCCESS)((btree_update_key(bt, src, 0, &nullkey) == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 2312, __func__, "btree_update_key(bt, src, 0, &nullkey) == BT_SUCCESS")); | |||
2313 | } | |||
2314 | ||||
2315 | if (dstindx == 0 && dst->parent_index != 0) { | |||
2316 | expand_prefix(bt, dst, 0, &tmpkey); | |||
2317 | key.size = tmpkey.len; | |||
2318 | key.data = tmpkey.str; | |||
2319 | remove_prefix(bt, &key, dst->parent->prefix.len); | |||
2320 | ||||
2321 | DPRINTF("update separator for destination page %u to [%.*s]", | |||
2322 | dst->pgno, (int)key.size, (char *)key.data); | |||
2323 | if (btree_update_key(bt, dst->parent, dst->parent_index, | |||
2324 | &key) != BT_SUCCESS0) | |||
2325 | return BT_FAIL-1; | |||
2326 | } | |||
2327 | ||||
2328 | if (dstindx == 0 && IS_BRANCH(dst)((((dst)->page->flags) & (0x01)) == (0x01))) { | |||
2329 | struct btval nullkey; | |||
2330 | nullkey.size = 0; | |||
2331 | assert(btree_update_key(bt, dst, 0, &nullkey) == BT_SUCCESS)((btree_update_key(bt, dst, 0, &nullkey) == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 2331, __func__, "btree_update_key(bt, dst, 0, &nullkey) == BT_SUCCESS")); | |||
2332 | } | |||
2333 | ||||
2334 | /* We can get a new page prefix here! | |||
2335 | * Must update keys in all nodes of this page! | |||
2336 | */ | |||
2337 | pfxlen = src->prefix.len; | |||
2338 | find_common_prefix(bt, src); | |||
2339 | if (src->prefix.len != pfxlen) { | |||
2340 | if (btree_adjust_prefix(bt, src, | |||
2341 | src->prefix.len - pfxlen) != BT_SUCCESS0) | |||
2342 | return BT_FAIL-1; | |||
2343 | } | |||
2344 | ||||
2345 | pfxlen = dst->prefix.len; | |||
2346 | find_common_prefix(bt, dst); | |||
2347 | if (dst->prefix.len != pfxlen) { | |||
2348 | if (btree_adjust_prefix(bt, dst, | |||
2349 | dst->prefix.len - pfxlen) != BT_SUCCESS0) | |||
2350 | return BT_FAIL-1; | |||
2351 | } | |||
2352 | ||||
2353 | if (IS_BRANCH(dst)((((dst)->page->flags) & (0x01)) == (0x01))) { | |||
2354 | assert(mp)((mp) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2354, __func__, "mp")); | |||
2355 | mp->parent = dst; | |||
2356 | mp->parent_index = dstindx; | |||
2357 | find_common_prefix(bt, mp); | |||
2358 | if (mp->prefix.len != mp_pfxlen) { | |||
2359 | DPRINTF("moved branch node has changed prefix"); | |||
2360 | if ((mp = mpage_touch(bt, mp)) == NULL((void *)0)) | |||
2361 | return BT_FAIL-1; | |||
2362 | if (btree_adjust_prefix(bt, mp, | |||
2363 | mp->prefix.len - mp_pfxlen) != BT_SUCCESS0) | |||
2364 | return BT_FAIL-1; | |||
2365 | } | |||
2366 | } | |||
2367 | ||||
2368 | return BT_SUCCESS0; | |||
2369 | } | |||
2370 | ||||
2371 | static int | |||
2372 | btree_merge(struct btree *bt, struct mpage *src, struct mpage *dst) | |||
2373 | { | |||
2374 | int rc; | |||
2375 | indx_t i; | |||
2376 | unsigned int pfxlen; | |||
2377 | struct node *srcnode; | |||
2378 | struct btkey tmpkey, dstpfx; | |||
2379 | struct btval key, data; | |||
2380 | ||||
2381 | DPRINTF("merging page %u and %u", src->pgno, dst->pgno); | |||
2382 | ||||
2383 | assert(src->parent)((src->parent) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2383, __func__, "src->parent")); /* can't merge root page */ | |||
2384 | assert(dst->parent)((dst->parent) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2384, __func__, "dst->parent")); | |||
2385 | assert(bt->txn != NULL)((bt->txn != ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2385, __func__, "bt->txn != NULL")); | |||
2386 | ||||
2387 | /* Mark src and dst as dirty. */ | |||
2388 | if ((src = mpage_touch(bt, src)) == NULL((void *)0) || | |||
2389 | (dst = mpage_touch(bt, dst)) == NULL((void *)0)) | |||
2390 | return BT_FAIL-1; | |||
2391 | ||||
2392 | find_common_prefix(bt, src); | |||
2393 | find_common_prefix(bt, dst); | |||
2394 | ||||
2395 | /* Check if source nodes has destination page prefix. Otherwise | |||
2396 | * the destination page must expand its prefix on all its nodes. | |||
2397 | */ | |||
2398 | common_prefix(bt, &src->prefix, &dst->prefix, &dstpfx); | |||
2399 | if (dstpfx.len != dst->prefix.len) { | |||
2400 | if (btree_adjust_prefix(bt, dst, | |||
2401 | dstpfx.len - dst->prefix.len) != BT_SUCCESS0) | |||
2402 | return BT_FAIL-1; | |||
2403 | bcopy(&dstpfx, &dst->prefix, sizeof(dstpfx)); | |||
2404 | } | |||
2405 | ||||
2406 | /* Move all nodes from src to dst. | |||
2407 | */ | |||
2408 | for (i = 0; i < NUMKEYS(src)(((src)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1); i++) { | |||
2409 | srcnode = NODEPTR(src, i)((struct node *)((char *)((src)->page) + ((src)->page)-> ptrs[i])); | |||
2410 | ||||
2411 | /* If branch node 0 (implicit key), find the real key. | |||
2412 | */ | |||
2413 | if (i == 0 && IS_BRANCH(src)((((src)->page->flags) & (0x01)) == (0x01))) { | |||
2414 | struct mpage *low; | |||
2415 | ||||
2416 | /* must find the lowest key below src | |||
2417 | */ | |||
2418 | assert(btree_search_page_root(bt, src, NULL, NULL, 0,((btree_search_page_root(bt, src, ((void *)0), ((void *)0), 0 , &low) == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2419, __func__, "btree_search_page_root(bt, src, NULL, NULL, 0, &low) == BT_SUCCESS" )) | |||
2419 | &low) == BT_SUCCESS)((btree_search_page_root(bt, src, ((void *)0), ((void *)0), 0 , &low) == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2419, __func__, "btree_search_page_root(bt, src, NULL, NULL, 0, &low) == BT_SUCCESS" )); | |||
2420 | expand_prefix(bt, low, 0, &tmpkey); | |||
2421 | DPRINTF("found lowest key [%.*s] on leaf page %u", | |||
2422 | (int)tmpkey.len, tmpkey.str, low->pgno); | |||
2423 | } else { | |||
2424 | expand_prefix(bt, src, i, &tmpkey); | |||
2425 | } | |||
2426 | ||||
2427 | key.size = tmpkey.len; | |||
2428 | key.data = tmpkey.str; | |||
2429 | ||||
2430 | remove_prefix(bt, &key, dst->prefix.len); | |||
2431 | data.size = NODEDSZ(srcnode)((srcnode)->p.np_dsize); | |||
2432 | data.data = NODEDATA(srcnode)(void *)((char *)(srcnode)->data + (srcnode)->ksize); | |||
2433 | rc = btree_add_node(bt, dst, NUMKEYS(dst)(((dst)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1), &key, | |||
2434 | &data, NODEPGNO(srcnode)((srcnode)->p.np_pgno), srcnode->flags); | |||
2435 | if (rc != BT_SUCCESS0) | |||
2436 | return rc; | |||
2437 | } | |||
2438 | ||||
2439 | DPRINTF("dst page %u now has %lu keys (%.1f%% filled)", | |||
2440 | dst->pgno, NUMKEYS(dst), (float)PAGEFILL(bt, dst) / 10); | |||
2441 | ||||
2442 | /* Unlink the src page from parent. | |||
2443 | */ | |||
2444 | btree_del_node(bt, src->parent, src->parent_index); | |||
2445 | if (src->parent_index == 0) { | |||
2446 | key.size = 0; | |||
2447 | if (btree_update_key(bt, src->parent, 0, &key) != BT_SUCCESS0) | |||
2448 | return BT_FAIL-1; | |||
2449 | ||||
2450 | pfxlen = src->prefix.len; | |||
2451 | find_common_prefix(bt, src); | |||
2452 | assert (src->prefix.len == pfxlen)((src->prefix.len == pfxlen) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2452, __func__, "src->prefix.len == pfxlen")); | |||
2453 | } | |||
2454 | ||||
2455 | if (IS_LEAF(src)((((src)->page->flags) & (0x02)) == (0x02))) | |||
2456 | bt->meta.leaf_pages--; | |||
2457 | else | |||
2458 | bt->meta.branch_pages--; | |||
2459 | ||||
2460 | return btree_rebalance(bt, src->parent); | |||
2461 | } | |||
2462 | ||||
2463 | #define FILL_THRESHOLD250 250 | |||
2464 | ||||
2465 | static int | |||
2466 | btree_rebalance(struct btree *bt, struct mpage *mp) | |||
2467 | { | |||
2468 | struct node *node; | |||
2469 | struct mpage *parent; | |||
2470 | struct mpage *root; | |||
2471 | struct mpage *neighbor = NULL((void *)0); | |||
2472 | indx_t si = 0, di = 0; | |||
2473 | ||||
2474 | assert(bt != NULL)((bt != ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2474, __func__, "bt != NULL")); | |||
2475 | assert(bt->txn != NULL)((bt->txn != ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2475, __func__, "bt->txn != NULL")); | |||
2476 | assert(mp != NULL)((mp != ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2476, __func__, "mp != NULL")); | |||
2477 | ||||
2478 | DPRINTF("rebalancing %s page %u (has %lu keys, %.1f%% full)", | |||
2479 | IS_LEAF(mp) ? "leaf" : "branch", | |||
2480 | mp->pgno, NUMKEYS(mp), (float)PAGEFILL(bt, mp) / 10); | |||
2481 | ||||
2482 | if (PAGEFILL(bt, mp)(1000 * ((bt)->head.psize - __builtin_offsetof(struct page , ptrs) - (indx_t)((mp)->page->b.fb.fb_upper - (mp)-> page->b.fb.fb_lower)) / ((bt)->head.psize - __builtin_offsetof (struct page, ptrs))) >= FILL_THRESHOLD250) { | |||
2483 | DPRINTF("no need to rebalance page %u, above fill threshold", | |||
2484 | mp->pgno); | |||
2485 | return BT_SUCCESS0; | |||
2486 | } | |||
2487 | ||||
2488 | parent = mp->parent; | |||
2489 | ||||
2490 | if (parent == NULL((void *)0)) { | |||
2491 | if (NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1) == 0) { | |||
2492 | DPRINTF("tree is completely empty"); | |||
2493 | bt->txn->root = P_INVALID0xFFFFFFFF; | |||
2494 | bt->meta.depth--; | |||
2495 | bt->meta.leaf_pages--; | |||
2496 | } else if (IS_BRANCH(mp)((((mp)->page->flags) & (0x01)) == (0x01)) && NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1) == 1) { | |||
2497 | DPRINTF("collapsing root page!"); | |||
2498 | bt->txn->root = NODEPGNO(NODEPTR(mp, 0))((((struct node *)((char *)((mp)->page) + ((mp)->page)-> ptrs[0])))->p.np_pgno); | |||
2499 | if ((root = btree_get_mpage(bt, bt->txn->root)) == NULL((void *)0)) | |||
2500 | return BT_FAIL-1; | |||
2501 | root->parent = NULL((void *)0); | |||
2502 | bt->meta.depth--; | |||
2503 | bt->meta.branch_pages--; | |||
2504 | } else | |||
2505 | DPRINTF("root page doesn't need rebalancing"); | |||
2506 | return BT_SUCCESS0; | |||
2507 | } | |||
2508 | ||||
2509 | /* The parent (branch page) must have at least 2 pointers, | |||
2510 | * otherwise the tree is invalid. | |||
2511 | */ | |||
2512 | assert(NUMKEYS(parent) > 1)(((((parent)->page->b.fb.fb_lower - __builtin_offsetof( struct page, ptrs)) >> 1) > 1) ? (void)0 : __assert2 ("/usr/src/usr.sbin/ldapd/btree.c", 2512, __func__, "NUMKEYS(parent) > 1" )); | |||
2513 | ||||
2514 | /* Leaf page fill factor is below the threshold. | |||
2515 | * Try to move keys from left or right neighbor, or | |||
2516 | * merge with a neighbor page. | |||
2517 | */ | |||
2518 | ||||
2519 | /* Find neighbors. | |||
2520 | */ | |||
2521 | if (mp->parent_index == 0) { | |||
2522 | /* We're the leftmost leaf in our parent. | |||
2523 | */ | |||
2524 | DPRINTF("reading right neighbor"); | |||
2525 | node = NODEPTR(parent, mp->parent_index + 1)((struct node *)((char *)((parent)->page) + ((parent)-> page)->ptrs[mp->parent_index + 1])); | |||
2526 | if ((neighbor = btree_get_mpage(bt, NODEPGNO(node)((node)->p.np_pgno))) == NULL((void *)0)) | |||
2527 | return BT_FAIL-1; | |||
2528 | neighbor->parent_index = mp->parent_index + 1; | |||
2529 | si = 0; | |||
2530 | di = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1); | |||
2531 | } else { | |||
2532 | /* There is at least one neighbor to the left. | |||
2533 | */ | |||
2534 | DPRINTF("reading left neighbor"); | |||
2535 | node = NODEPTR(parent, mp->parent_index - 1)((struct node *)((char *)((parent)->page) + ((parent)-> page)->ptrs[mp->parent_index - 1])); | |||
2536 | if ((neighbor = btree_get_mpage(bt, NODEPGNO(node)((node)->p.np_pgno))) == NULL((void *)0)) | |||
2537 | return BT_FAIL-1; | |||
2538 | neighbor->parent_index = mp->parent_index - 1; | |||
2539 | si = NUMKEYS(neighbor)(((neighbor)->page->b.fb.fb_lower - __builtin_offsetof( struct page, ptrs)) >> 1) - 1; | |||
2540 | di = 0; | |||
2541 | } | |||
2542 | neighbor->parent = parent; | |||
2543 | ||||
2544 | DPRINTF("found neighbor page %u (%lu keys, %.1f%% full)", | |||
2545 | neighbor->pgno, NUMKEYS(neighbor), (float)PAGEFILL(bt, neighbor) / 10); | |||
2546 | ||||
2547 | /* If the neighbor page is above threshold and has at least two | |||
2548 | * keys, move one key from it. | |||
2549 | * | |||
2550 | * Otherwise we should try to merge them, but that might not be | |||
2551 | * possible, even if both are below threshold, as prefix expansion | |||
2552 | * might make keys larger. FIXME: detect this | |||
2553 | */ | |||
2554 | if (PAGEFILL(bt, neighbor)(1000 * ((bt)->head.psize - __builtin_offsetof(struct page , ptrs) - (indx_t)((neighbor)->page->b.fb.fb_upper - (neighbor )->page->b.fb.fb_lower)) / ((bt)->head.psize - __builtin_offsetof (struct page, ptrs))) >= FILL_THRESHOLD250 && NUMKEYS(neighbor)(((neighbor)->page->b.fb.fb_lower - __builtin_offsetof( struct page, ptrs)) >> 1) >= 2) | |||
2555 | return btree_move_node(bt, neighbor, si, mp, di); | |||
2556 | else { /* FIXME: if (has_enough_room()) */ | |||
2557 | if (mp->parent_index == 0) | |||
2558 | return btree_merge(bt, neighbor, mp); | |||
2559 | else | |||
2560 | return btree_merge(bt, mp, neighbor); | |||
2561 | } | |||
2562 | } | |||
2563 | ||||
2564 | int | |||
2565 | btree_txn_del(struct btree *bt, struct btree_txn *txn, | |||
2566 | struct btval *key, struct btval *data) | |||
2567 | { | |||
2568 | int rc, exact, close_txn = 0; | |||
2569 | unsigned int ki; | |||
2570 | struct node *leaf; | |||
2571 | struct mpage *mp; | |||
2572 | ||||
2573 | DPRINTF("========> delete key %.*s", (int)key->size, (char *)key->data); | |||
2574 | ||||
2575 | assert(key != NULL)((key != ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2575, __func__, "key != NULL")); | |||
2576 | ||||
2577 | if (bt != NULL((void *)0) && txn != NULL((void *)0) && bt != txn->bt) { | |||
2578 | errno(*__errno()) = EINVAL22; | |||
2579 | return BT_FAIL-1; | |||
2580 | } | |||
2581 | ||||
2582 | if (txn != NULL((void *)0) && F_ISSET(txn->flags, BT_TXN_RDONLY)(((txn->flags) & (0x01)) == (0x01))) { | |||
2583 | errno(*__errno()) = EINVAL22; | |||
2584 | return BT_FAIL-1; | |||
2585 | } | |||
2586 | ||||
2587 | if (bt == NULL((void *)0)) { | |||
2588 | if (txn == NULL((void *)0)) { | |||
2589 | errno(*__errno()) = EINVAL22; | |||
2590 | return BT_FAIL-1; | |||
2591 | } | |||
2592 | bt = txn->bt; | |||
2593 | } | |||
2594 | ||||
2595 | if (key->size == 0 || key->size > MAXKEYSIZE255) { | |||
2596 | errno(*__errno()) = EINVAL22; | |||
2597 | return BT_FAIL-1; | |||
2598 | } | |||
2599 | ||||
2600 | if (txn == NULL((void *)0)) { | |||
2601 | close_txn = 1; | |||
2602 | if ((txn = btree_txn_begin(bt, 0)) == NULL((void *)0)) | |||
2603 | return BT_FAIL-1; | |||
2604 | } | |||
2605 | ||||
2606 | if ((rc = btree_search_page(bt, txn, key, NULL((void *)0), 1, &mp)) != BT_SUCCESS0) | |||
2607 | goto done; | |||
2608 | ||||
2609 | leaf = btree_search_node(bt, mp, key, &exact, &ki); | |||
2610 | if (leaf == NULL((void *)0) || !exact) { | |||
2611 | errno(*__errno()) = ENOENT2; | |||
2612 | rc = BT_FAIL-1; | |||
2613 | goto done; | |||
2614 | } | |||
2615 | ||||
2616 | if (data && (rc = btree_read_data(bt, NULL((void *)0), leaf, data)) != BT_SUCCESS0) | |||
2617 | goto done; | |||
2618 | ||||
2619 | btree_del_node(bt, mp, ki); | |||
2620 | bt->meta.entries--; | |||
2621 | rc = btree_rebalance(bt, mp); | |||
2622 | if (rc != BT_SUCCESS0) | |||
2623 | txn->flags |= BT_TXN_ERROR0x02; | |||
2624 | ||||
2625 | done: | |||
2626 | if (close_txn) { | |||
2627 | if (rc == BT_SUCCESS0) | |||
2628 | rc = btree_txn_commit(txn); | |||
2629 | else | |||
2630 | btree_txn_abort(txn); | |||
2631 | } | |||
2632 | mpage_prune(bt); | |||
2633 | return rc; | |||
2634 | } | |||
2635 | ||||
2636 | /* Reduce the length of the prefix separator <sep> to the minimum length that | |||
2637 | * still makes it uniquely distinguishable from <min>. | |||
2638 | * | |||
2639 | * <min> is guaranteed to be sorted less than <sep> | |||
2640 | * | |||
2641 | * On return, <sep> is modified to the minimum length. | |||
2642 | */ | |||
2643 | static void | |||
2644 | bt_reduce_separator(struct btree *bt, struct node *min, struct btval *sep) | |||
2645 | { | |||
2646 | size_t n = 0; | |||
2647 | char *p1; | |||
2648 | char *p2; | |||
2649 | ||||
2650 | if (F_ISSET(bt->flags, BT_REVERSEKEY)(((bt->flags) & (0x08)) == (0x08))) { | |||
2651 | ||||
2652 | assert(sep->size > 0)((sep->size > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2652, __func__, "sep->size > 0")); | |||
2653 | ||||
2654 | p1 = (char *)NODEKEY(min)(void *)((min)->data) + min->ksize - 1; | |||
2655 | p2 = (char *)sep->data + sep->size - 1; | |||
2656 | ||||
2657 | while (p1 >= (char *)NODEKEY(min)(void *)((min)->data) && *p1 == *p2) { | |||
2658 | assert(p2 > (char *)sep->data)((p2 > (char *)sep->data) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2658, __func__, "p2 > (char *)sep->data")); | |||
2659 | p1--; | |||
2660 | p2--; | |||
2661 | n++; | |||
2662 | } | |||
2663 | ||||
2664 | sep->data = p2; | |||
2665 | sep->size = n + 1; | |||
2666 | } else { | |||
2667 | ||||
2668 | assert(min->ksize > 0)((min->ksize > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2668, __func__, "min->ksize > 0")); | |||
2669 | assert(sep->size > 0)((sep->size > 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2669, __func__, "sep->size > 0")); | |||
2670 | ||||
2671 | p1 = (char *)NODEKEY(min)(void *)((min)->data); | |||
2672 | p2 = (char *)sep->data; | |||
2673 | ||||
2674 | while (*p1 == *p2) { | |||
2675 | p1++; | |||
2676 | p2++; | |||
2677 | n++; | |||
2678 | if (n == min->ksize || n == sep->size) | |||
2679 | break; | |||
2680 | } | |||
2681 | ||||
2682 | sep->size = n + 1; | |||
2683 | } | |||
2684 | ||||
2685 | DPRINTF("reduced separator to [%.*s] > [%.*s]", | |||
2686 | (int)sep->size, (char *)sep->data, | |||
2687 | (int)min->ksize, (char *)NODEKEY(min)); | |||
2688 | } | |||
2689 | ||||
2690 | /* Split page <*mpp>, and insert <key,(data|newpgno)> in either left or | |||
2691 | * right sibling, at index <*newindxp> (as if unsplit). Updates *mpp and | |||
2692 | * *newindxp with the actual values after split, ie if *mpp and *newindxp | |||
2693 | * refer to a node in the new right sibling page. | |||
2694 | */ | |||
2695 | static int | |||
2696 | btree_split(struct btree *bt, struct mpage **mpp, unsigned int *newindxp, | |||
2697 | struct btval *newkey, struct btval *newdata, pgno_t newpgno) | |||
2698 | { | |||
2699 | uint8_t flags; | |||
2700 | int rc = BT_SUCCESS0, ins_new = 0; | |||
2701 | indx_t newindx; | |||
2702 | pgno_t pgno = 0; | |||
2703 | size_t orig_pfx_len, left_pfx_diff, right_pfx_diff, pfx_diff; | |||
2704 | unsigned int i, j, split_indx; | |||
2705 | struct node *node; | |||
2706 | struct mpage *pright, *p, *mp; | |||
2707 | struct btval sepkey, rkey, rdata; | |||
2708 | struct btkey tmpkey; | |||
2709 | struct page *copy; | |||
2710 | ||||
2711 | assert(bt != NULL)((bt != ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2711, __func__, "bt != NULL")); | |||
2712 | assert(bt->txn != NULL)((bt->txn != ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2712, __func__, "bt->txn != NULL")); | |||
2713 | ||||
2714 | mp = *mpp; | |||
2715 | newindx = *newindxp; | |||
2716 | ||||
2717 | DPRINTF("-----> splitting %s page %u and adding [%.*s] at index %d", | |||
2718 | IS_LEAF(mp) ? "leaf" : "branch", mp->pgno, | |||
2719 | (int)newkey->size, (char *)newkey->data, *newindxp); | |||
2720 | DPRINTF("page %u has prefix [%.*s]", mp->pgno, | |||
2721 | (int)mp->prefix.len, (char *)mp->prefix.str); | |||
2722 | orig_pfx_len = mp->prefix.len; | |||
2723 | ||||
2724 | if (mp->parent == NULL((void *)0)) { | |||
2725 | if ((mp->parent = btree_new_page(bt, P_BRANCH0x01)) == NULL((void *)0)) | |||
2726 | return BT_FAIL-1; | |||
2727 | mp->parent_index = 0; | |||
2728 | bt->txn->root = mp->parent->pgno; | |||
2729 | DPRINTF("root split! new root = %u", mp->parent->pgno); | |||
2730 | bt->meta.depth++; | |||
2731 | ||||
2732 | /* Add left (implicit) pointer. */ | |||
2733 | if (btree_add_node(bt, mp->parent, 0, NULL((void *)0), NULL((void *)0), | |||
2734 | mp->pgno, 0) != BT_SUCCESS0) | |||
2735 | return BT_FAIL-1; | |||
2736 | } else { | |||
2737 | DPRINTF("parent branch page is %u", mp->parent->pgno); | |||
2738 | } | |||
2739 | ||||
2740 | /* Create a right sibling. */ | |||
2741 | if ((pright = btree_new_page(bt, mp->page->flags)) == NULL((void *)0)) | |||
2742 | return BT_FAIL-1; | |||
2743 | pright->parent = mp->parent; | |||
2744 | pright->parent_index = mp->parent_index + 1; | |||
2745 | DPRINTF("new right sibling: page %u", pright->pgno); | |||
2746 | ||||
2747 | /* Move half of the keys to the right sibling. */ | |||
2748 | if ((copy = malloc(bt->head.psize)) == NULL((void *)0)) | |||
2749 | return BT_FAIL-1; | |||
2750 | bcopy(mp->page, copy, bt->head.psize); | |||
2751 | assert(mp->ref == 0)((mp->ref == 0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2751, __func__, "mp->ref == 0")); /* XXX */ | |||
2752 | memset(&mp->page->ptrs, 0, bt->head.psize - PAGEHDRSZ__builtin_offsetof(struct page, ptrs)); | |||
2753 | mp->page->lowerb.fb.fb_lower = PAGEHDRSZ__builtin_offsetof(struct page, ptrs); | |||
2754 | mp->page->upperb.fb.fb_upper = bt->head.psize; | |||
2755 | ||||
2756 | split_indx = NUMKEYSP(copy)(((copy)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs )) >> 1) / 2 + 1; | |||
2757 | ||||
2758 | /* First find the separating key between the split pages. | |||
2759 | */ | |||
2760 | memset(&sepkey, 0, sizeof(sepkey)); | |||
2761 | if (newindx == split_indx) { | |||
2762 | sepkey.size = newkey->size; | |||
2763 | sepkey.data = newkey->data; | |||
2764 | remove_prefix(bt, &sepkey, mp->prefix.len); | |||
2765 | } else { | |||
2766 | node = NODEPTRP(copy, split_indx)((struct node *)((char *)(copy) + (copy)->ptrs[split_indx] )); | |||
2767 | sepkey.size = node->ksize; | |||
2768 | sepkey.data = NODEKEY(node)(void *)((node)->data); | |||
2769 | } | |||
2770 | ||||
2771 | if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02)) && bt->cmp == NULL((void *)0)) { | |||
2772 | /* Find the smallest separator. */ | |||
2773 | /* Ref: Prefix B-trees, R. Bayer, K. Unterauer, 1977 */ | |||
2774 | node = NODEPTRP(copy, split_indx - 1)((struct node *)((char *)(copy) + (copy)->ptrs[split_indx - 1])); | |||
2775 | bt_reduce_separator(bt, node, &sepkey); | |||
2776 | } | |||
2777 | ||||
2778 | /* Fix separator wrt parent prefix. */ | |||
2779 | if (bt->cmp == NULL((void *)0)) { | |||
2780 | tmpkey.len = sizeof(tmpkey.str); | |||
2781 | concat_prefix(bt, mp->prefix.str, mp->prefix.len, | |||
2782 | sepkey.data, sepkey.size, tmpkey.str, &tmpkey.len); | |||
2783 | sepkey.data = tmpkey.str; | |||
2784 | sepkey.size = tmpkey.len; | |||
2785 | } | |||
2786 | ||||
2787 | DPRINTF("separator is [%.*s]", (int)sepkey.size, (char *)sepkey.data); | |||
2788 | ||||
2789 | /* Copy separator key to the parent. | |||
2790 | */ | |||
2791 | if (SIZELEFT(pright->parent)(indx_t)((pright->parent)->page->b.fb.fb_upper - (pright ->parent)->page->b.fb.fb_lower) < bt_branch_size(bt, &sepkey)) { | |||
2792 | rc = btree_split(bt, &pright->parent, &pright->parent_index, | |||
2793 | &sepkey, NULL((void *)0), pright->pgno); | |||
2794 | ||||
2795 | /* Right page might now have changed parent. | |||
2796 | * Check if left page also changed parent. | |||
2797 | */ | |||
2798 | if (pright->parent != mp->parent && | |||
2799 | mp->parent_index >= NUMKEYS(mp->parent)(((mp->parent)->page->b.fb.fb_lower - __builtin_offsetof (struct page, ptrs)) >> 1)) { | |||
2800 | mp->parent = pright->parent; | |||
2801 | mp->parent_index = pright->parent_index - 1; | |||
2802 | } | |||
2803 | } else { | |||
2804 | remove_prefix(bt, &sepkey, pright->parent->prefix.len); | |||
2805 | rc = btree_add_node(bt, pright->parent, pright->parent_index, | |||
2806 | &sepkey, NULL((void *)0), pright->pgno, 0); | |||
2807 | } | |||
2808 | if (rc != BT_SUCCESS0) { | |||
2809 | free(copy); | |||
2810 | return BT_FAIL-1; | |||
2811 | } | |||
2812 | ||||
2813 | /* Update prefix for right and left page, if the parent was split. | |||
2814 | */ | |||
2815 | find_common_prefix(bt, pright); | |||
2816 | assert(orig_pfx_len <= pright->prefix.len)((orig_pfx_len <= pright->prefix.len) ? (void)0 : __assert2 ("/usr/src/usr.sbin/ldapd/btree.c", 2816, __func__, "orig_pfx_len <= pright->prefix.len" )); | |||
2817 | right_pfx_diff = pright->prefix.len - orig_pfx_len; | |||
2818 | ||||
2819 | find_common_prefix(bt, mp); | |||
2820 | assert(orig_pfx_len <= mp->prefix.len)((orig_pfx_len <= mp->prefix.len) ? (void)0 : __assert2 ("/usr/src/usr.sbin/ldapd/btree.c", 2820, __func__, "orig_pfx_len <= mp->prefix.len" )); | |||
2821 | left_pfx_diff = mp->prefix.len - orig_pfx_len; | |||
2822 | ||||
2823 | for (i = j = 0; i <= NUMKEYSP(copy)(((copy)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs )) >> 1); j++) { | |||
2824 | if (i < split_indx) { | |||
2825 | /* Re-insert in left sibling. */ | |||
2826 | p = mp; | |||
2827 | pfx_diff = left_pfx_diff; | |||
2828 | } else { | |||
2829 | /* Insert in right sibling. */ | |||
2830 | if (i == split_indx) | |||
2831 | /* Reset insert index for right sibling. */ | |||
2832 | j = (i == newindx && ins_new); | |||
2833 | p = pright; | |||
2834 | pfx_diff = right_pfx_diff; | |||
2835 | } | |||
2836 | ||||
2837 | if (i == newindx && !ins_new) { | |||
2838 | /* Insert the original entry that caused the split. */ | |||
2839 | rkey.data = newkey->data; | |||
2840 | rkey.size = newkey->size; | |||
2841 | if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) { | |||
2842 | rdata.data = newdata->data; | |||
2843 | rdata.size = newdata->size; | |||
2844 | } else | |||
2845 | pgno = newpgno; | |||
2846 | flags = 0; | |||
2847 | pfx_diff = p->prefix.len; | |||
2848 | ||||
2849 | ins_new = 1; | |||
2850 | ||||
2851 | /* Update page and index for the new key. */ | |||
2852 | *newindxp = j; | |||
2853 | *mpp = p; | |||
2854 | } else if (i == NUMKEYSP(copy)(((copy)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs )) >> 1)) { | |||
2855 | break; | |||
2856 | } else { | |||
2857 | node = NODEPTRP(copy, i)((struct node *)((char *)(copy) + (copy)->ptrs[i])); | |||
2858 | rkey.data = NODEKEY(node)(void *)((node)->data); | |||
2859 | rkey.size = node->ksize; | |||
2860 | if (IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02))) { | |||
2861 | rdata.data = NODEDATA(node)(void *)((char *)(node)->data + (node)->ksize); | |||
2862 | rdata.size = node->n_dsizep.np_dsize; | |||
2863 | } else | |||
2864 | pgno = node->n_pgnop.np_pgno; | |||
2865 | flags = node->flags; | |||
2866 | ||||
2867 | i++; | |||
2868 | } | |||
2869 | ||||
2870 | if (!IS_LEAF(mp)((((mp)->page->flags) & (0x02)) == (0x02)) && j == 0) { | |||
2871 | /* First branch index doesn't need key data. */ | |||
2872 | rkey.size = 0; | |||
2873 | } else | |||
2874 | remove_prefix(bt, &rkey, pfx_diff); | |||
2875 | ||||
2876 | rc = btree_add_node(bt, p, j, &rkey, &rdata, pgno,flags); | |||
2877 | } | |||
2878 | ||||
2879 | free(copy); | |||
2880 | return rc; | |||
2881 | } | |||
2882 | ||||
2883 | int | |||
2884 | btree_txn_put(struct btree *bt, struct btree_txn *txn, | |||
2885 | struct btval *key, struct btval *data, unsigned int flags) | |||
2886 | { | |||
2887 | int rc = BT_SUCCESS0, exact, close_txn = 0; | |||
2888 | unsigned int ki; | |||
2889 | struct node *leaf; | |||
2890 | struct mpage *mp; | |||
2891 | struct btval xkey; | |||
2892 | ||||
2893 | assert(key != NULL)((key != ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2893, __func__, "key != NULL")); | |||
2894 | assert(data != NULL)((data != ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 2894, __func__, "data != NULL")); | |||
2895 | ||||
2896 | if (bt != NULL((void *)0) && txn != NULL((void *)0) && bt != txn->bt) { | |||
2897 | errno(*__errno()) = EINVAL22; | |||
2898 | return BT_FAIL-1; | |||
2899 | } | |||
2900 | ||||
2901 | if (txn != NULL((void *)0) && F_ISSET(txn->flags, BT_TXN_RDONLY)(((txn->flags) & (0x01)) == (0x01))) { | |||
2902 | errno(*__errno()) = EINVAL22; | |||
2903 | return BT_FAIL-1; | |||
2904 | } | |||
2905 | ||||
2906 | if (bt == NULL((void *)0)) { | |||
2907 | if (txn == NULL((void *)0)) { | |||
2908 | errno(*__errno()) = EINVAL22; | |||
2909 | return BT_FAIL-1; | |||
2910 | } | |||
2911 | bt = txn->bt; | |||
2912 | } | |||
2913 | ||||
2914 | if (key->size == 0 || key->size > MAXKEYSIZE255) { | |||
2915 | errno(*__errno()) = EINVAL22; | |||
2916 | return BT_FAIL-1; | |||
2917 | } | |||
2918 | ||||
2919 | DPRINTF("==> put key %.*s, size %zu, data size %zu", | |||
2920 | (int)key->size, (char *)key->data, key->size, data->size); | |||
2921 | ||||
2922 | if (txn == NULL((void *)0)) { | |||
2923 | close_txn = 1; | |||
2924 | if ((txn = btree_txn_begin(bt, 0)) == NULL((void *)0)) | |||
2925 | return BT_FAIL-1; | |||
2926 | } | |||
2927 | ||||
2928 | rc = btree_search_page(bt, txn, key, NULL((void *)0), 1, &mp); | |||
2929 | if (rc == BT_SUCCESS0) { | |||
2930 | leaf = btree_search_node(bt, mp, key, &exact, &ki); | |||
2931 | if (leaf && exact) { | |||
2932 | if (F_ISSET(flags, BT_NOOVERWRITE)(((flags) & (1)) == (1))) { | |||
2933 | DPRINTF("duplicate key %.*s", | |||
2934 | (int)key->size, (char *)key->data); | |||
2935 | errno(*__errno()) = EEXIST17; | |||
2936 | rc = BT_FAIL-1; | |||
2937 | goto done; | |||
2938 | } | |||
2939 | btree_del_node(bt, mp, ki); | |||
2940 | } | |||
2941 | if (leaf == NULL((void *)0)) { /* append if not found */ | |||
2942 | ki = NUMKEYS(mp)(((mp)->page->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs)) >> 1); | |||
2943 | DPRINTF("appending key at index %d", ki); | |||
2944 | } | |||
2945 | } else if (errno(*__errno()) == ENOENT2) { | |||
2946 | /* new file, just write a root leaf page */ | |||
2947 | DPRINTF("allocating new root leaf page"); | |||
2948 | if ((mp = btree_new_page(bt, P_LEAF0x02)) == NULL((void *)0)) { | |||
2949 | rc = BT_FAIL-1; | |||
2950 | goto done; | |||
2951 | } | |||
2952 | txn->root = mp->pgno; | |||
2953 | bt->meta.depth++; | |||
2954 | ki = 0; | |||
2955 | } | |||
2956 | else | |||
2957 | goto done; | |||
2958 | ||||
2959 | assert(IS_LEAF(mp))((((((mp)->page->flags) & (0x02)) == (0x02))) ? (void )0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 2959, __func__ , "IS_LEAF(mp)")); | |||
2960 | DPRINTF("there are %lu keys, should insert new key at index %u", | |||
2961 | NUMKEYS(mp), ki); | |||
2962 | ||||
2963 | /* Copy the key pointer as it is modified by the prefix code. The | |||
2964 | * caller might have malloc'ed the data. | |||
2965 | */ | |||
2966 | xkey.data = key->data; | |||
2967 | xkey.size = key->size; | |||
2968 | ||||
2969 | if (SIZELEFT(mp)(indx_t)((mp)->page->b.fb.fb_upper - (mp)->page-> b.fb.fb_lower) < bt_leaf_size(bt, key, data)) { | |||
2970 | rc = btree_split(bt, &mp, &ki, &xkey, data, P_INVALID0xFFFFFFFF); | |||
2971 | } else { | |||
2972 | /* There is room already in this leaf page. */ | |||
2973 | remove_prefix(bt, &xkey, mp->prefix.len); | |||
2974 | rc = btree_add_node(bt, mp, ki, &xkey, data, 0, 0); | |||
2975 | } | |||
2976 | ||||
2977 | if (rc != BT_SUCCESS0) | |||
2978 | txn->flags |= BT_TXN_ERROR0x02; | |||
2979 | else | |||
2980 | bt->meta.entries++; | |||
2981 | ||||
2982 | done: | |||
2983 | if (close_txn) { | |||
2984 | if (rc == BT_SUCCESS0) | |||
2985 | rc = btree_txn_commit(txn); | |||
2986 | else | |||
2987 | btree_txn_abort(txn); | |||
2988 | } | |||
2989 | mpage_prune(bt); | |||
2990 | return rc; | |||
2991 | } | |||
2992 | ||||
2993 | static pgno_t | |||
2994 | btree_compact_tree(struct btree *bt, pgno_t pgno, struct btree *btc) | |||
2995 | { | |||
2996 | ssize_t rc; | |||
2997 | indx_t i; | |||
2998 | pgno_t *pnext, next; | |||
2999 | struct node *node; | |||
3000 | struct page *p; | |||
3001 | struct mpage *mp; | |||
3002 | ||||
3003 | /* Get the page and make a copy of it. | |||
3004 | */ | |||
3005 | if ((mp = btree_get_mpage(bt, pgno)) == NULL((void *)0)) | |||
3006 | return P_INVALID0xFFFFFFFF; | |||
3007 | if ((p = malloc(bt->head.psize)) == NULL((void *)0)) | |||
3008 | return P_INVALID0xFFFFFFFF; | |||
3009 | bcopy(mp->page, p, bt->head.psize); | |||
3010 | ||||
3011 | /* Go through all nodes in the (copied) page and update the | |||
3012 | * page pointers. | |||
3013 | */ | |||
3014 | if (F_ISSET(p->flags, P_BRANCH)(((p->flags) & (0x01)) == (0x01))) { | |||
3015 | for (i = 0; i < NUMKEYSP(p)(((p)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs )) >> 1); i++) { | |||
3016 | node = NODEPTRP(p, i)((struct node *)((char *)(p) + (p)->ptrs[i])); | |||
3017 | node->n_pgnop.np_pgno = btree_compact_tree(bt, node->n_pgnop.np_pgno, btc); | |||
3018 | if (node->n_pgnop.np_pgno == P_INVALID0xFFFFFFFF) { | |||
3019 | free(p); | |||
3020 | return P_INVALID0xFFFFFFFF; | |||
3021 | } | |||
3022 | } | |||
3023 | } else if (F_ISSET(p->flags, P_LEAF)(((p->flags) & (0x02)) == (0x02))) { | |||
3024 | for (i = 0; i < NUMKEYSP(p)(((p)->b.fb.fb_lower - __builtin_offsetof(struct page, ptrs )) >> 1); i++) { | |||
3025 | node = NODEPTRP(p, i)((struct node *)((char *)(p) + (p)->ptrs[i])); | |||
3026 | if (F_ISSET(node->flags, F_BIGDATA)(((node->flags) & (0x01)) == (0x01))) { | |||
3027 | bcopy(NODEDATA(node)(void *)((char *)(node)->data + (node)->ksize), &next, sizeof(next)); | |||
3028 | next = btree_compact_tree(bt, next, btc); | |||
3029 | if (next == P_INVALID0xFFFFFFFF) { | |||
3030 | free(p); | |||
3031 | return P_INVALID0xFFFFFFFF; | |||
3032 | } | |||
3033 | bcopy(&next, NODEDATA(node)(void *)((char *)(node)->data + (node)->ksize), sizeof(next)); | |||
3034 | } | |||
3035 | } | |||
3036 | } else if (F_ISSET(p->flags, P_OVERFLOW)(((p->flags) & (0x04)) == (0x04))) { | |||
3037 | pnext = &p->p_next_pgnob.pb_next_pgno; | |||
3038 | if (*pnext > 0) { | |||
3039 | *pnext = btree_compact_tree(bt, *pnext, btc); | |||
3040 | if (*pnext == P_INVALID0xFFFFFFFF) { | |||
3041 | free(p); | |||
3042 | return P_INVALID0xFFFFFFFF; | |||
3043 | } | |||
3044 | } | |||
3045 | } else | |||
3046 | assert(0)((0) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c", 3046, __func__, "0")); | |||
3047 | ||||
3048 | pgno = p->pgno = btc->txn->next_pgno++; | |||
3049 | rc = write(btc->fd, p, bt->head.psize); | |||
3050 | free(p); | |||
3051 | if (rc != (ssize_t)bt->head.psize) | |||
3052 | return P_INVALID0xFFFFFFFF; | |||
3053 | mpage_prune(bt); | |||
3054 | return pgno; | |||
3055 | } | |||
3056 | ||||
3057 | int | |||
3058 | btree_compact(struct btree *bt) | |||
3059 | { | |||
3060 | char *compact_path = NULL((void *)0); | |||
3061 | struct btree *btc; | |||
3062 | struct btree_txn *txn, *txnc = NULL((void *)0); | |||
3063 | int fd; | |||
3064 | pgno_t root; | |||
3065 | ||||
3066 | assert(bt != NULL)((bt != ((void *)0)) ? (void)0 : __assert2("/usr/src/usr.sbin/ldapd/btree.c" , 3066, __func__, "bt != NULL")); | |||
3067 | ||||
3068 | DPRINTF("compacting btree %p with path %s", bt, bt->path); | |||
3069 | ||||
3070 | if (bt->path == NULL((void *)0)) { | |||
3071 | errno(*__errno()) = EINVAL22; | |||
3072 | return BT_FAIL-1; | |||
3073 | } | |||
3074 | ||||
3075 | if ((txn = btree_txn_begin(bt, 0)) == NULL((void *)0)) | |||
3076 | return BT_FAIL-1; | |||
3077 | ||||
3078 | if (asprintf(&compact_path, "%s.compact.XXXXXX", bt->path) == -1) { | |||
3079 | btree_txn_abort(txn); | |||
3080 | return BT_FAIL-1; | |||
3081 | } | |||
3082 | fd = mkstemp(compact_path); | |||
3083 | if (fd == -1) { | |||
3084 | free(compact_path); | |||
3085 | btree_txn_abort(txn); | |||
3086 | return BT_FAIL-1; | |||
3087 | } | |||
3088 | ||||
3089 | if ((btc = btree_open_fd(fd, 0)) == NULL((void *)0)) | |||
3090 | goto failed; | |||
3091 | bcopy(&bt->meta, &btc->meta, sizeof(bt->meta)); | |||
3092 | btc->meta.revisions = 0; | |||
3093 | ||||
3094 | if ((txnc = btree_txn_begin(btc, 0)) == NULL((void *)0)) | |||
3095 | goto failed; | |||
3096 | ||||
3097 | if (bt->meta.root != P_INVALID0xFFFFFFFF) { | |||
3098 | root = btree_compact_tree(bt, bt->meta.root, btc); | |||
3099 | if (root == P_INVALID0xFFFFFFFF) | |||
3100 | goto failed; | |||
3101 | if (btree_write_meta(btc, root, 0) != BT_SUCCESS0) | |||
3102 | goto failed; | |||
3103 | } | |||
3104 | ||||
3105 | fsync(fd); | |||
3106 | ||||
3107 | DPRINTF("renaming %s to %s", compact_path, bt->path); | |||
3108 | if (rename(compact_path, bt->path) != 0) | |||
3109 | goto failed; | |||
3110 | ||||
3111 | /* Write a "tombstone" meta page so other processes can pick up | |||
3112 | * the change and re-open the file. | |||
3113 | */ | |||
3114 | if (btree_write_meta(bt, P_INVALID0xFFFFFFFF, BT_TOMBSTONE0x01) != BT_SUCCESS0) | |||
3115 | goto failed; | |||
3116 | ||||
3117 | btree_txn_abort(txn); | |||
3118 | btree_txn_abort(txnc); | |||
3119 | free(compact_path); | |||
3120 | btree_close(btc); | |||
3121 | mpage_prune(bt); | |||
3122 | return 0; | |||
3123 | ||||
3124 | failed: | |||
3125 | btree_txn_abort(txn); | |||
3126 | btree_txn_abort(txnc); | |||
3127 | unlink(compact_path); | |||
3128 | free(compact_path); | |||
3129 | btree_close(btc); | |||
3130 | mpage_prune(bt); | |||
3131 | return BT_FAIL-1; | |||
3132 | } | |||
3133 | ||||
3134 | /* Reverts the last change. Truncates the file at the last root page. | |||
3135 | */ | |||
3136 | int | |||
3137 | btree_revert(struct btree *bt) | |||
3138 | { | |||
3139 | if (btree_read_meta(bt, NULL((void *)0)) != 0) | |||
3140 | return -1; | |||
3141 | ||||
3142 | DPRINTF("truncating file at page %u", bt->meta.root); | |||
3143 | return ftruncate(bt->fd, bt->head.psize * bt->meta.root); | |||
3144 | } | |||
3145 | ||||
3146 | void | |||
3147 | btree_set_cache_size(struct btree *bt, unsigned int cache_size) | |||
3148 | { | |||
3149 | bt->stat.max_cache = cache_size; | |||
3150 | } | |||
3151 | ||||
3152 | unsigned int | |||
3153 | btree_get_flags(struct btree *bt) | |||
3154 | { | |||
3155 | return (bt->flags & ~BT_FIXPADDING0x01); | |||
3156 | } | |||
3157 | ||||
3158 | const char * | |||
3159 | btree_get_path(struct btree *bt) | |||
3160 | { | |||
3161 | return bt->path; | |||
3162 | } | |||
3163 | ||||
3164 | const struct btree_stat * | |||
3165 | btree_stat(struct btree *bt) | |||
3166 | { | |||
3167 | if (bt == NULL((void *)0)) | |||
3168 | return NULL((void *)0); | |||
3169 | ||||
3170 | bt->stat.branch_pages = bt->meta.branch_pages; | |||
3171 | bt->stat.leaf_pages = bt->meta.leaf_pages; | |||
3172 | bt->stat.overflow_pages = bt->meta.overflow_pages; | |||
3173 | bt->stat.revisions = bt->meta.revisions; | |||
3174 | bt->stat.depth = bt->meta.depth; | |||
3175 | bt->stat.entries = bt->meta.entries; | |||
3176 | bt->stat.psize = bt->head.psize; | |||
3177 | bt->stat.created_at = bt->meta.created_at; | |||
3178 | ||||
3179 | return &bt->stat; | |||
3180 | } | |||
3181 |