From dc46f82c49dad5103f30a2b5478beff3902e07da Mon Sep 17 00:00:00 2001 From: Konrad Wojas Date: Tue, 20 Apr 2021 15:30:44 +0800 Subject: [PATCH 1/3] Add update-lmdb.sh to ease C lib upgrades --- update-lmdb.sh | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100755 update-lmdb.sh diff --git a/update-lmdb.sh b/update-lmdb.sh new file mode 100755 index 00000000..7ded309b --- /dev/null +++ b/update-lmdb.sh @@ -0,0 +1,50 @@ +#!/bin/sh +# +# Script to update our copy of the LMDB library and headers +# Releases can be found here: https://github.com/LMDB/lmdb/releases +# + +function get_define { + grep "^#define $1" lmdb/lmdb.h | head -1 | awk '{print $3}' +} + +function get_version { + echo "$(get_define MDB_VERSION_MAJOR).$(get_define MDB_VERSION_MINOR).$(get_define MDB_VERSION_PATCH)" +} + +cur_version="$(get_version)" +echo "Current LMDB version: $cur_version" +echo + +version="$1" + +if [ -z "$version" ]; then + echo "USAGE: $0 " + echo "Check https://github.com/LMDB/lmdb/releases for available versions" + exit 1 +fi + +set -ex + +tmp_dir=$(mktemp -d -t lmdb-update) +echo "Temp dir: $tmp_dir" + +curl -L "https://github.com/LMDB/lmdb/archive/refs/tags/LMDB_${version}.tar.gz" | tar -C "$tmp_dir" -xvz +cp "$tmp_dir/lmdb-LMDB_${version}/libraries/liblmdb/mdb.c" lmdb/mdb.c +cp "$tmp_dir/lmdb-LMDB_${version}/libraries/liblmdb/lmdb.h" lmdb/lmdb.h +cp "$tmp_dir/lmdb-LMDB_${version}/libraries/liblmdb/CHANGES" CHANGES.lmdb.txt + +if [ ! -z "$tmp_dir" ]; then + echo "Removing temp dir: $tmp_dir" + rm -rf "$tmp_dir" +fi + +set +ex +echo +new_version="$(get_version)" +echo "New LMDB version: $new_version" +echo +echo "NOTE: Do not forget to include the upstream changelog from $cur_version to $new_version from" +echo " CHANGES.lmdb.txt in our CHANGES.md, and do not forget to test!" + + From a12cacae22d244efdfeafd2d41f04db14b9410b5 Mon Sep 17 00:00:00 2001 From: Konrad Wojas Date: Tue, 20 Apr 2021 15:31:32 +0800 Subject: [PATCH 2/3] Upgrade LMDB C lib from 0.9.19 to 0.9.28 --- CHANGES.lmdb.txt | 262 +++++++++++++++++++++++++++++++ CHANGES.md | 51 ++++++ lmdb/lmdb.h | 16 +- lmdb/mdb.c | 394 +++++++++++++++++++++++++++++------------------ 4 files changed, 563 insertions(+), 160 deletions(-) create mode 100644 CHANGES.lmdb.txt diff --git a/CHANGES.lmdb.txt b/CHANGES.lmdb.txt new file mode 100644 index 00000000..d1f8307f --- /dev/null +++ b/CHANGES.lmdb.txt @@ -0,0 +1,262 @@ +LMDB 0.9 Change Log + +LMDB 0.9.28 Release (2021/02/04) + ITS#8662 add -a append option to mdb_load + +LMDB 0.9.27 Release (2020/10/26) + ITS#9376 fix repeated DUPSORT cursor deletes + +LMDB 0.9.26 Release (2020/08/11) + ITS#9278 fix robust mutex cleanup for FreeBSD + +LMDB 0.9.25 Release (2020/01/30) + ITS#9068 fix mdb_dump/load backslashes in printable content + ITS#9118 add MAP_NOSYNC for FreeBSD + ITS#9155 free mt_spill_pgs in non-nested txn on end + +LMDB 0.9.24 Release (2019/07/24) + ITS#8969 Tweak mdb_page_split + ITS#8975 WIN32 fix writemap set_mapsize crash + ITS#9007 Fix loose pages in WRITEMAP + +LMDB 0.9.23 Release (2018/12/19) + ITS#8756 Fix loose pages in dirty list + ITS#8831 Fix mdb_load flag init + ITS#8844 Fix mdb_env_close in forked process + Documentation + ITS#8857 mdb_cursor_del doesn't invalidate cursor + ITS#8908 GET_MULTIPLE etc don't change passed in key + +LMDB 0.9.22 Release (2018/03/22) + Fix MDB_DUPSORT alignment bug (ITS#8819) + Fix regression with new db from 0.9.19 (ITS#8760) + Fix liblmdb to build on Solaris (ITS#8612) + Fix delete behavior with DUPSORT DB (ITS#8622) + Fix mdb_cursor_get/mdb_cursor_del behavior (ITS#8722) + +LMDB 0.9.21 Release (2017/06/01) + Fix xcursor after cursor_del (ITS#8622) + +LMDB 0.9.20 (Withdrawn) + Fix mdb_load with escaped plaintext (ITS#8558) + Fix mdb_cursor_last / mdb_put interaction (ITS#8557) + +LMDB 0.9.19 Release (2016/12/28) + Fix mdb_env_cwalk cursor init (ITS#8424) + Fix robust mutexes on Solaris 10/11 (ITS#8339) + Tweak Win32 error message buffer + Fix MDB_GET_BOTH on non-dup record (ITS#8393) + Optimize mdb_drop + Fix xcursors after mdb_cursor_del (ITS#8406) + Fix MDB_NEXT_DUP after mdb_cursor_del (ITS#8412) + Fix mdb_cursor_put resetting C_EOF (ITS#8489) + Fix mdb_env_copyfd2 to return EPIPE on SIGPIPE (ITS#8504) + Fix mdb_env_copy with empty DB (ITS#8209) + Fix behaviors with fork (ITS#8505) + Fix mdb_dbi_open with mainDB cursors (ITS#8542) + Fix robust mutexes on kFreeBSD (ITS#8554) + Fix utf8_to_utf16 error checks (ITS#7992) + Fix F_NOCACHE on MacOS, error is non-fatal (ITS#7682) + Build + Make shared lib suffix overridable (ITS#8481) + Documentation + Cleanup doxygen nits + Note reserved vs actual mem/disk usage + + +LMDB 0.9.18 Release (2016/02/05) + Fix robust mutex detection on glibc 2.10-11 (ITS#8330) + Fix page_search_root assert on FreeDB (ITS#8336) + Fix MDB_APPENDDUP vs. rewrite(single item) (ITS#8334) + Fix mdb_copy of large files on Windows + Fix subcursor move after delete (ITS#8355) + Fix mdb_midl_shirnk off-by-one (ITS#8363) + Check for utf8_to_utf16 failures (ITS#7992) + Catch strdup failure in mdb_dbi_open + Build + Additional makefile var tweaks (ITS#8169) + Documentation + Add Getting Started page + Update WRITEMAP description + + +LMDB 0.9.17 Release (2015/11/30) + Fix ITS#7377 catch calloc failure + Fix ITS#8237 regression from ITS#7589 + Fix ITS#8238 page_split for DUPFIXED pages + Fix ITS#8221 MDB_PAGE_FULL on delete/rebalance + Fix ITS#8258 rebalance/split assert + Fix ITS#8263 cursor_put cursor tracking + Fix ITS#8264 cursor_del cursor tracking + Fix ITS#8310 cursor_del cursor tracking + Fix ITS#8299 mdb_del cursor tracking + Fix ITS#8300 mdb_del cursor tracking + Fix ITS#8304 mdb_del cursor tracking + Fix ITS#7771 fakepage cursor tracking + Fix ITS#7789 ensure mapsize >= pages in use + Fix ITS#7971 mdb_txn_renew0() new reader slots + Fix ITS#7969 use __sync_synchronize on non-x86 + Fix ITS#8311 page_split from update_key + Fix ITS#8312 loose pages in nested txn + Fix ITS#8313 mdb_rebalance dummy cursor + Fix ITS#8315 dirty_room in nested txn + Fix ITS#8323 dirty_list in nested txn + Fix ITS#8316 page_merge cursor tracking + Fix ITS#8321 cursor tracking + Fix ITS#8319 mdb_load error messages + Fix ITS#8320 mdb_load plaintext input + Added mdb_txn_id() (ITS#7994) + Added robust mutex support + Miscellaneous cleanup/simplification + Build + Create install dirs if needed (ITS#8256) + Fix ThreadProc decl on Win32/MSVC (ITS#8270) + Added ssize_t typedef for MSVC (ITS#8067) + Use ANSI apis on Windows (ITS#8069) + Use O_SYNC if O_DSYNC,MDB_DSYNC are not defined (ITS#7209) + Allow passing AR to make (ITS#8168) + Allow passing mandir to make install (ITS#8169) + +LMDB 0.9.16 Release (2015/08/14) + Fix cursor EOF bug (ITS#8190) + Fix handling of subDB records (ITS#8181) + Fix mdb_midl_shrink() usage (ITS#8200) + +LMDB 0.9.15 Release (2015/06/19) + Fix txn init (ITS#7961,#7987) + Fix MDB_PREV_DUP (ITS#7955,#7671) + Fix compact of empty env (ITS#7956) + Fix mdb_copy file mode + Fix mdb_env_close() after failed mdb_env_open() + Fix mdb_rebalance collapsing root (ITS#8062) + Fix mdb_load with large values (ITS#8066) + Fix to retry writes on EINTR (ITS#8106) + Fix mdb_cursor_del on empty DB (ITS#8109) + Fix MDB_INTEGERDUP key compare (ITS#8117) + Fix error handling (ITS#7959,#8157,etc.) + Fix race conditions (ITS#7969,7970) + Added workaround for fdatasync bug in ext3fs + Build + Don't use -fPIC for static lib + Update .gitignore (ITS#7952,#7953) + Cleanup for "make test" (ITS#7841), "make clean", mtest*.c + Misc. Android/Windows cleanup + Documentation + Fix MDB_APPEND doc + Fix MDB_MAXKEYSIZE doc (ITS#8156) + Fix mdb_cursor_put,mdb_cursor_del EACCES description + Fix mdb_env_sync(MDB_RDONLY env) doc (ITS#8021) + Clarify MDB_WRITEMAP doc (ITS#8021) + Clarify mdb_env_open doc + Clarify mdb_dbi_open doc + +LMDB 0.9.14 Release (2014/09/20) + Fix to support 64K page size (ITS#7713) + Fix to persist decreased as well as increased mapsizes (ITS#7789) + Fix cursor bug when deleting last node of a DUPSORT key + Fix mdb_env_info to return FIXEDMAP address + Fix ambiguous error code from writing to closed DBI (ITS#7825) + Fix mdb_copy copying past end of file (ITS#7886) + Fix cursor bugs from page_merge/rebalance + Fix to dirty fewer pages in deletes (mdb_page_loose()) + Fix mdb_dbi_open creating subDBs (ITS#7917) + Fix mdb_cursor_get(_DUP) with single value (ITS#7913) + Fix Windows compat issues in mtests (ITS#7879) + Add compacting variant of mdb_copy + Add BigEndian integer key compare code + Add mdb_dump/mdb_load utilities + +LMDB 0.9.13 Release (2014/06/18) + Fix mdb_page_alloc unlimited overflow page search + Documentation + Re-fix MDB_CURRENT doc (ITS#7793) + Fix MDB_GET_MULTIPLE/MDB_NEXT_MULTIPLE doc + +LMDB 0.9.12 Release (2014/06/13) + Fix MDB_GET_BOTH regression (ITS#7875,#7681) + Fix MDB_MULTIPLE writing multiple keys (ITS#7834) + Fix mdb_rebalance (ITS#7829) + Fix mdb_page_split (ITS#7815) + Fix md_entries count (ITS#7861,#7828,#7793) + Fix MDB_CURRENT (ITS#7793) + Fix possible crash on Windows DLL detach + Misc code cleanup + Documentation + mdb_cursor_put: cursor moves on error (ITS#7771) + + +LMDB 0.9.11 Release (2014/01/15) + Add mdb_env_set_assert() (ITS#7775) + Fix: invalidate txn on page allocation errors (ITS#7377) + Fix xcursor tracking in mdb_cursor_del0() (ITS#7771) + Fix corruption from deletes (ITS#7756) + Fix Windows/MSVC build issues + Raise safe limit of max MDB_MAXKEYSIZE + Misc code cleanup + Documentation + Remove spurious note about non-overlapping flags (ITS#7665) + +LMDB 0.9.10 Release (2013/11/12) + Add MDB_NOMEMINIT option + Fix mdb_page_split() again (ITS#7589) + Fix MDB_NORDAHEAD definition (ITS#7734) + Fix mdb_cursor_del() positioning (ITS#7733) + Partial fix for larger page sizes (ITS#7713) + Fix Windows64/MSVC build issues + +LMDB 0.9.9 Release (2013/10/24) + Add mdb_env_get_fd() + Add MDB_NORDAHEAD option + Add MDB_NOLOCK option + Avoid wasting space in mdb_page_split() (ITS#7589) + Fix mdb_page_merge() cursor fixup (ITS#7722) + Fix mdb_cursor_del() on last delete (ITS#7718) + Fix adding WRITEMAP on existing env (ITS#7715) + Fix nested txns (ITS#7515) + Fix mdb_env_copy() O_DIRECT bug (ITS#7682) + Fix mdb_cursor_set(SET_RANGE) return code (ITS#7681) + Fix mdb_rebalance() cursor fixup (ITS#7701) + Misc code cleanup + Documentation + Note that by default, readers need write access + + +LMDB 0.9.8 Release (2013/09/09) + Allow mdb_env_set_mapsize() on an open environment + Fix mdb_dbi_flags() (ITS#7672) + Fix mdb_page_unspill() in nested txns + Fix mdb_cursor_get(CURRENT|NEXT) after a delete + Fix mdb_cursor_get(DUP) to always return key (ITS#7671) + Fix mdb_cursor_del() to always advance to next item (ITS#7670) + Fix mdb_cursor_set(SET_RANGE) for tree with single page (ITS#7681) + Fix mdb_env_copy() retry open if O_DIRECT fails (ITS#7682) + Tweak mdb_page_spill() to be less aggressive + Documentation + Update caveats since mdb_reader_check() added in 0.9.7 + +LMDB 0.9.7 Release (2013/08/17) + Don't leave stale lockfile on failed RDONLY open (ITS#7664) + Fix mdb_page_split() ref beyond cursor depth + Fix read txn data race (ITS#7635) + Fix mdb_rebalance (ITS#7536, #7538) + Fix mdb_drop() (ITS#7561) + Misc DEBUG macro fixes + Add MDB_NOTLS envflag + Add mdb_env_copyfd() + Add mdb_txn_env() (ITS#7660) + Add mdb_dbi_flags() (ITS#7661) + Add mdb_env_get_maxkeysize() + Add mdb_env_reader_list()/mdb_env_reader_check() + Add mdb_page_spill/unspill, remove hard txn size limit + Use shorter names for semaphores (ITS#7615) + Build + Fix install target (ITS#7656) + Documentation + Misc updates for cursors, DB handles, data lifetime + +LMDB 0.9.6 Release (2013/02/25) + Many fixes/enhancements + +LMDB 0.9.5 Release (2012/11/30) + Renamed from libmdb to liblmdb + Many fixes/enhancements diff --git a/CHANGES.md b/CHANGES.md index cf6a6787..4f60072e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,57 @@ ## v1.9.0-dev +First release of this PowerDNS/lmdb-go fork. + +- Renamed module from `github.com/bmatsuo/lmdb-go` to `github.com/PowerDNS/lmdb-go` (#3, PR #7) +- Add `go.mod` and fix tests and Travis CI on recent Go versions (#5, PR #6) +- Fix: Cursor.Put would write "\x00" instead of an empty value (#1, PR #2) +- lmdb: Update LMDB C library to version 0.9.28 (#4). + +``` + LMDB 0.9.28 Release (2021/02/04) + ITS#8662 add -a append option to mdb_load + + LMDB 0.9.27 Release (2020/10/26) + ITS#9376 fix repeated DUPSORT cursor deletes + + LMDB 0.9.26 Release (2020/08/11) + ITS#9278 fix robust mutex cleanup for FreeBSD + + LMDB 0.9.25 Release (2020/01/30) + ITS#9068 fix mdb_dump/load backslashes in printable content + ITS#9118 add MAP_NOSYNC for FreeBSD + ITS#9155 free mt_spill_pgs in non-nested txn on end + + LMDB 0.9.24 Release (2019/07/24) + ITS#8969 Tweak mdb_page_split + ITS#8975 WIN32 fix writemap set_mapsize crash + ITS#9007 Fix loose pages in WRITEMAP + + LMDB 0.9.23 Release (2018/12/19) + ITS#8756 Fix loose pages in dirty list + ITS#8831 Fix mdb_load flag init + ITS#8844 Fix mdb_env_close in forked process + Documentation + ITS#8857 mdb_cursor_del doesn't invalidate cursor + ITS#8908 GET_MULTIPLE etc don't change passed in key + + LMDB 0.9.22 Release (2018/03/22) + Fix MDB_DUPSORT alignment bug (ITS#8819) + Fix regression with new db from 0.9.19 (ITS#8760) + Fix liblmdb to build on Solaris (ITS#8612) + Fix delete behavior with DUPSORT DB (ITS#8622) + Fix mdb_cursor_get/mdb_cursor_del behavior (ITS#8722) + + LMDB 0.9.21 Release (2017/06/01) + Fix xcursor after cursor_del (ITS#8622) + + LMDB 0.9.20 (Withdrawn) + Fix mdb_load with escaped plaintext (ITS#8558) + Fix mdb_cursor_last / mdb_put interaction (ITS#8557) +``` + + Changes predating the PowerDNS fork (up to 2017): - Fix unsafe threading behavior in benchmarks (bmatsuo/lmdb-go#101) diff --git a/lmdb/lmdb.h b/lmdb/lmdb.h index 0aeff0d3..bbdc179f 100644 --- a/lmdb/lmdb.h +++ b/lmdb/lmdb.h @@ -135,7 +135,7 @@ * * @author Howard Chu, Symas Corporation. * - * @copyright Copyright 2011-2016 Howard Chu, Symas Corp. All rights reserved. + * @copyright Copyright 2011-2021 Howard Chu, Symas Corp. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted only as authorized by the OpenLDAP @@ -200,7 +200,7 @@ typedef int mdb_filehandle_t; /** Library minor version */ #define MDB_VERSION_MINOR 9 /** Library patch version */ -#define MDB_VERSION_PATCH 19 +#define MDB_VERSION_PATCH 28 /** Combine args a,b,c into a single integer for easy version comparisons */ #define MDB_VERINT(a,b,c) (((a) << 24) | ((b) << 16) | (c)) @@ -210,7 +210,7 @@ typedef int mdb_filehandle_t; MDB_VERINT(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH) /** The release date of this library version */ -#define MDB_VERSION_DATE "December 28, 2016" +#define MDB_VERSION_DATE "February 4, 2020" /** A stringifier for the version info */ #define MDB_VERSTR(a,b,c,d) "LMDB " #a "." #b "." #c ": (" d ")" @@ -370,7 +370,7 @@ typedef enum MDB_cursor_op { MDB_GET_BOTH, /**< Position at key/data pair. Only for #MDB_DUPSORT */ MDB_GET_BOTH_RANGE, /**< position at key, nearest data. Only for #MDB_DUPSORT */ MDB_GET_CURRENT, /**< Return key/data at current cursor position */ - MDB_GET_MULTIPLE, /**< Return key and up to a page of duplicate data items + MDB_GET_MULTIPLE, /**< Return up to a page of duplicate data items from current cursor position. Move cursor to prepare for #MDB_NEXT_MULTIPLE. Only for #MDB_DUPFIXED */ MDB_LAST, /**< Position at last key/data item */ @@ -379,7 +379,7 @@ typedef enum MDB_cursor_op { MDB_NEXT, /**< Position at next data item */ MDB_NEXT_DUP, /**< Position at next data item of current key. Only for #MDB_DUPSORT */ - MDB_NEXT_MULTIPLE, /**< Return key and up to a page of duplicate data items + MDB_NEXT_MULTIPLE, /**< Return up to a page of duplicate data items from next cursor position. Move cursor to prepare for #MDB_NEXT_MULTIPLE. Only for #MDB_DUPFIXED */ MDB_NEXT_NODUP, /**< Position at first data item of next key */ @@ -390,7 +390,7 @@ typedef enum MDB_cursor_op { MDB_SET, /**< Position at specified key */ MDB_SET_KEY, /**< Position at specified key, return key + data */ MDB_SET_RANGE, /**< Position at first key greater than or equal to specified key. */ - MDB_PREV_MULTIPLE /**< Position at previous page and return key and up to + MDB_PREV_MULTIPLE /**< Position at previous page and return up to a page of duplicate data items. Only for #MDB_DUPFIXED */ } MDB_cursor_op; @@ -1510,6 +1510,10 @@ int mdb_cursor_put(MDB_cursor *cursor, MDB_val *key, MDB_val *data, /** @brief Delete current key/data pair * * This function deletes the key/data pair to which the cursor refers. + * This does not invalidate the cursor, so operations such as MDB_NEXT + * can still be used on it. + * Both MDB_NEXT and MDB_GET_CURRENT will return the same record after + * this operation. * @param[in] cursor A cursor handle returned by #mdb_cursor_open() * @param[in] flags Options for this operation. This parameter * must be set to 0 or one of the values described here. diff --git a/lmdb/mdb.c b/lmdb/mdb.c index 23c1f009..621cbcfa 100644 --- a/lmdb/mdb.c +++ b/lmdb/mdb.c @@ -5,7 +5,7 @@ * BerkeleyDB API, but much simplified. */ /* - * Copyright 2011-2016 Howard Chu, Symas Corp. + * Copyright 2011-2021 Howard Chu, Symas Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -113,6 +113,10 @@ typedef SSIZE_T ssize_t; /* Most platforms have posix_memalign, older may only have memalign */ #define HAVE_MEMALIGN 1 #include +/* On Solaris, we need the POSIX sigwait function */ +#if defined (__sun) +# define _POSIX_PTHREAD_SEMANTICS 1 +#endif #endif #if !(defined(BYTE_ORDER) || defined(__BYTE_ORDER)) @@ -120,7 +124,10 @@ typedef SSIZE_T ssize_t; #include /* defines BYTE_ORDER on HPUX and Solaris */ #endif -#if defined(__APPLE__) || defined (BSD) || defined(__FreeBSD_kernel__) +#if defined(__FreeBSD__) && defined(__FreeBSD_version) && __FreeBSD_version >= 1100110 +# define MDB_USE_POSIX_MUTEX 1 +# define MDB_USE_ROBUST 1 +#elif defined(__APPLE__) || defined (BSD) || defined(__FreeBSD_kernel__) # define MDB_USE_POSIX_SEM 1 # define MDB_FDATASYNC fsync #elif defined(ANDROID) @@ -1222,17 +1229,19 @@ typedef struct MDB_xcursor { unsigned char mx_dbflag; } MDB_xcursor; - /** Check if there is an inited xcursor, so #XCURSOR_REFRESH() is proper */ + /** Check if there is an inited xcursor */ #define XCURSOR_INITED(mc) \ ((mc)->mc_xcursor && ((mc)->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) - /** Update sub-page pointer, if any, in \b mc->mc_xcursor. Needed + /** Update the xcursor's sub-page pointer, if any, in \b mc. Needed * when the node which contains the sub-page may have moved. Called - * with \b mp = mc->mc_pg[mc->mc_top], \b ki = mc->mc_ki[mc->mc_top]. + * with leaf page \b mp = mc->mc_pg[\b top]. */ -#define XCURSOR_REFRESH(mc, mp, ki) do { \ +#define XCURSOR_REFRESH(mc, top, mp) do { \ MDB_page *xr_pg = (mp); \ - MDB_node *xr_node = NODEPTR(xr_pg, ki); \ + MDB_node *xr_node; \ + if (!XCURSOR_INITED(mc) || (mc)->mc_ki[top] >= NUMKEYS(xr_pg)) break; \ + xr_node = NODEPTR(xr_pg, (mc)->mc_ki[top]); \ if ((xr_node->mn_flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA) \ (mc)->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(xr_node); \ } while (0) @@ -1369,7 +1378,7 @@ static int mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, static int mdb_env_read_header(MDB_env *env, MDB_meta *meta); static MDB_meta *mdb_env_pick_meta(const MDB_env *env); static int mdb_env_write_meta(MDB_txn *txn); -#ifdef MDB_USE_POSIX_MUTEX /* Drop unused excl arg */ +#if defined(MDB_USE_POSIX_MUTEX) && !defined(MDB_ROBUST_SUPPORTED) /* Drop unused excl arg */ # define mdb_env_close0(env, excl) mdb_env_close1(env) #endif static void mdb_env_close0(MDB_env *env, int excl); @@ -2485,8 +2494,8 @@ mdb_page_touch(MDB_cursor *mc) if (m2 == mc) continue; if (m2->mc_pg[mc->mc_top] == mp) { m2->mc_pg[mc->mc_top] = np; - if (XCURSOR_INITED(m2) && IS_LEAF(np)) - XCURSOR_REFRESH(m2, np, m2->mc_ki[mc->mc_top]); + if (IS_LEAF(np)) + XCURSOR_REFRESH(m2, mc->mc_top, np); } } } @@ -3021,9 +3030,9 @@ mdb_txn_end(MDB_txn *txn, unsigned mode) txn->mt_parent->mt_flags &= ~MDB_TXN_HAS_CHILD; env->me_pgstate = ((MDB_ntxn *)txn)->mnt_pgstate; mdb_midl_free(txn->mt_free_pgs); - mdb_midl_free(txn->mt_spill_pgs); free(txn->mt_u.dirty_list); } + mdb_midl_free(txn->mt_spill_pgs); mdb_midl_free(pghead); } @@ -3088,10 +3097,41 @@ mdb_freelist_save(MDB_txn *txn) * we may be unable to return them to me_pghead. */ MDB_page *mp = txn->mt_loose_pgs; + MDB_ID2 *dl = txn->mt_u.dirty_list; + unsigned x; if ((rc = mdb_midl_need(&txn->mt_free_pgs, txn->mt_loose_count)) != 0) return rc; - for (; mp; mp = NEXT_LOOSE_PAGE(mp)) + for (; mp; mp = NEXT_LOOSE_PAGE(mp)) { mdb_midl_xappend(txn->mt_free_pgs, mp->mp_pgno); + /* must also remove from dirty list */ + if (txn->mt_flags & MDB_TXN_WRITEMAP) { + for (x=1; x<=dl[0].mid; x++) + if (dl[x].mid == mp->mp_pgno) + break; + mdb_tassert(txn, x <= dl[0].mid); + } else { + x = mdb_mid2l_search(dl, mp->mp_pgno); + mdb_tassert(txn, dl[x].mid == mp->mp_pgno); + mdb_dpage_free(env, mp); + } + dl[x].mptr = NULL; + } + { + /* squash freed slots out of the dirty list */ + unsigned y; + for (y=1; dl[y].mptr && y <= dl[0].mid; y++); + if (y <= dl[0].mid) { + for(x=y, y++;;) { + while (!dl[y].mptr && y <= dl[0].mid) y++; + if (y > dl[0].mid) break; + dl[x++] = dl[y++]; + } + dl[0].mid = x-1; + } else { + /* all slots freed */ + dl[0].mid = 0; + } + } txn->mt_loose_pgs = NULL; txn->mt_loose_count = 0; } @@ -3954,9 +3994,9 @@ mdb_env_map(MDB_env *env, void *addr) * and won't map more than the file size. * Just set the maxsize right now. */ - if (SetFilePointer(env->me_fd, sizelo, &sizehi, 0) != (DWORD)sizelo + if (!(flags & MDB_WRITEMAP) && (SetFilePointer(env->me_fd, sizelo, &sizehi, 0) != (DWORD)sizelo || !SetEndOfFile(env->me_fd) - || SetFilePointer(env->me_fd, 0, NULL, 0) != 0) + || SetFilePointer(env->me_fd, 0, NULL, 0) != 0)) return ErrCode(); } @@ -3973,13 +4013,18 @@ mdb_env_map(MDB_env *env, void *addr) if (rc) return rc; #else + int mmap_flags = MAP_SHARED; int prot = PROT_READ; +#ifdef MAP_NOSYNC /* Used on FreeBSD */ + if (flags & MDB_NOSYNC) + mmap_flags |= MAP_NOSYNC; +#endif if (flags & MDB_WRITEMAP) { prot |= PROT_WRITE; if (ftruncate(env->me_fd, env->me_mapsize) < 0) return ErrCode(); } - env->me_map = mmap(addr, env->me_mapsize, prot, MAP_SHARED, + env->me_map = mmap(addr, env->me_mapsize, prot, mmap_flags, env->me_fd, 0); if (env->me_map == MAP_FAILED) { env->me_map = NULL; @@ -5057,7 +5102,7 @@ mdb_env_close0(MDB_env *env, int excl) if (env->me_fd != INVALID_HANDLE_VALUE) (void) close(env->me_fd); if (env->me_txns) { - MDB_PID_T pid = env->me_pid; + MDB_PID_T pid = getpid(); /* Clearing readers is done in this function because * me_txkey with its destructor must be disabled first. * @@ -5091,6 +5136,17 @@ mdb_env_close0(MDB_env *env, int excl) sem_unlink(env->me_txns->mti_wmname); } } +#elif defined(MDB_ROBUST_SUPPORTED) + /* If we have the filelock: If we are the + * only remaining user, clean up robust + * mutexes. + */ + if (excl == 0) + mdb_env_excl_lock(env, &excl); + if (excl > 0) { + pthread_mutex_destroy(env->me_txns->mti_rmutex); + pthread_mutex_destroy(env->me_txns->mti_wmutex); + } #endif munmap((void *)env->me_txns, (env->me_maxreaders-1)*sizeof(MDB_reader)+sizeof(MDB_txninfo)); } @@ -5459,8 +5515,17 @@ mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int flags) if (flags & (MDB_PS_FIRST|MDB_PS_LAST)) { i = 0; - if (flags & MDB_PS_LAST) + if (flags & MDB_PS_LAST) { i = NUMKEYS(mp) - 1; + /* if already init'd, see if we're already in right place */ + if (mc->mc_flags & C_INITIALIZED) { + if (mc->mc_ki[mc->mc_top] == i) { + mc->mc_top = mc->mc_snum++; + mp = mc->mc_pg[mc->mc_top]; + goto ready; + } + } + } } else { int exact; node = mdb_node_search(mc, key, &exact); @@ -5486,6 +5551,7 @@ mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int flags) if ((rc = mdb_cursor_push(mc, mp))) return rc; +ready: if (flags & MDB_PS_MODIFY) { if ((rc = mdb_page_touch(mc)) != 0) return rc; @@ -5811,15 +5877,20 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) MDB_node *leaf; int rc; - if ((mc->mc_flags & C_EOF) || - ((mc->mc_flags & C_DEL) && op == MDB_NEXT_DUP)) { + if ((mc->mc_flags & C_DEL && op == MDB_NEXT_DUP)) return MDB_NOTFOUND; - } + if (!(mc->mc_flags & C_INITIALIZED)) return mdb_cursor_first(mc, key, data); mp = mc->mc_pg[mc->mc_top]; + if (mc->mc_flags & C_EOF) { + if (mc->mc_ki[mc->mc_top] >= NUMKEYS(mp)-1) + return MDB_NOTFOUND; + mc->mc_flags ^= C_EOF; + } + if (mc->mc_db->md_flags & MDB_DUPSORT) { leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { @@ -5871,16 +5942,12 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { mdb_xcursor_init1(mc, leaf); - } - if (data) { + rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); + if (rc != MDB_SUCCESS) + return rc; + } else if (data) { if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS) return rc; - - if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { - rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); - if (rc != MDB_SUCCESS) - return rc; - } } MDB_GET_KEY(leaf, key); @@ -5904,7 +5971,8 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) mp = mc->mc_pg[mc->mc_top]; - if (mc->mc_db->md_flags & MDB_DUPSORT) { + if ((mc->mc_db->md_flags & MDB_DUPSORT) && + mc->mc_ki[mc->mc_top] < NUMKEYS(mp)) { leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { if (op == MDB_PREV || op == MDB_PREV_DUP) { @@ -5943,27 +6011,25 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) DPRINTF(("==> cursor points to page %"Z"u with %u keys, key index %u", mdb_dbg_pgno(mp), NUMKEYS(mp), mc->mc_ki[mc->mc_top])); + if (!IS_LEAF(mp)) + return MDB_CORRUPTED; + if (IS_LEAF2(mp)) { key->mv_size = mc->mc_db->md_pad; key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size); return MDB_SUCCESS; } - mdb_cassert(mc, IS_LEAF(mp)); leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { mdb_xcursor_init1(mc, leaf); - } - if (data) { + rc = mdb_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL); + if (rc != MDB_SUCCESS) + return rc; + } else if (data) { if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS) return rc; - - if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { - rc = mdb_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL); - if (rc != MDB_SUCCESS) - return rc; - } } MDB_GET_KEY(leaf, key); @@ -6119,24 +6185,22 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { mdb_xcursor_init1(mc, leaf); - } - if (data) { - if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { - if (op == MDB_SET || op == MDB_SET_KEY || op == MDB_SET_RANGE) { - rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); + if (op == MDB_SET || op == MDB_SET_KEY || op == MDB_SET_RANGE) { + rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); + } else { + int ex2, *ex2p; + if (op == MDB_GET_BOTH) { + ex2p = &ex2; + ex2 = 0; } else { - int ex2, *ex2p; - if (op == MDB_GET_BOTH) { - ex2p = &ex2; - ex2 = 0; - } else { - ex2p = NULL; - } - rc = mdb_cursor_set(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_SET_RANGE, ex2p); - if (rc != MDB_SUCCESS) - return rc; + ex2p = NULL; } - } else if (op == MDB_GET_BOTH || op == MDB_GET_BOTH_RANGE) { + rc = mdb_cursor_set(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_SET_RANGE, ex2p); + if (rc != MDB_SUCCESS) + return rc; + } + } else if (data) { + if (op == MDB_GET_BOTH || op == MDB_GET_BOTH_RANGE) { MDB_val olddata; MDB_cmp_func *dcmp; if ((rc = mdb_node_read(mc, leaf, &olddata)) != MDB_SUCCESS) @@ -6194,22 +6258,23 @@ mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data) mc->mc_ki[mc->mc_top] = 0; if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { - key->mv_size = mc->mc_db->md_pad; - key->mv_data = LEAF2KEY(mc->mc_pg[mc->mc_top], 0, key->mv_size); + if ( key ) { + key->mv_size = mc->mc_db->md_pad; + key->mv_data = LEAF2KEY(mc->mc_pg[mc->mc_top], 0, key->mv_size); + } return MDB_SUCCESS; } - if (data) { - if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { - mdb_xcursor_init1(mc, leaf); - rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); - if (rc) - return rc; - } else { - if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS) - return rc; - } + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + mdb_xcursor_init1(mc, leaf); + rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); + if (rc) + return rc; + } else if (data) { + if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS) + return rc; } + MDB_GET_KEY(leaf, key); return MDB_SUCCESS; } @@ -6224,36 +6289,33 @@ mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data) if (mc->mc_xcursor) mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); - if (!(mc->mc_flags & C_EOF)) { - - if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) { - rc = mdb_page_search(mc, NULL, MDB_PS_LAST); - if (rc != MDB_SUCCESS) - return rc; - } - mdb_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top])); - + if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) { + rc = mdb_page_search(mc, NULL, MDB_PS_LAST); + if (rc != MDB_SUCCESS) + return rc; } + mdb_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top])); + mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]) - 1; mc->mc_flags |= C_INITIALIZED|C_EOF; leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { - key->mv_size = mc->mc_db->md_pad; - key->mv_data = LEAF2KEY(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], key->mv_size); + if (key) { + key->mv_size = mc->mc_db->md_pad; + key->mv_data = LEAF2KEY(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], key->mv_size); + } return MDB_SUCCESS; } - if (data) { - if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { - mdb_xcursor_init1(mc, leaf); - rc = mdb_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL); - if (rc) - return rc; - } else { - if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS) - return rc; - } + if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { + mdb_xcursor_init1(mc, leaf); + rc = mdb_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL); + if (rc) + return rc; + } else if (data) { + if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS) + return rc; } MDB_GET_KEY(leaf, key); @@ -6410,6 +6472,11 @@ mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data, rc = MDB_INCOMPATIBLE; break; } + if (mc->mc_ki[mc->mc_top] >= NUMKEYS(mc->mc_pg[mc->mc_top])) { + mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]); + rc = MDB_NOTFOUND; + break; + } { MDB_node *leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) { @@ -6530,7 +6597,7 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, dkey.mv_size = 0; - if (flags == MDB_CURRENT) { + if (flags & MDB_CURRENT) { if (!(mc->mc_flags & C_INITIALIZED)) return EINVAL; rc = MDB_SUCCESS; @@ -6726,7 +6793,7 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, offset *= 4; /* space for 4 more */ break; } - /* FALLTHRU: Big enough MDB_DUPFIXED sub-page */ + /* FALLTHRU */ /* Big enough MDB_DUPFIXED sub-page */ case MDB_CURRENT: fp->mp_flags |= P_DIRTY; COPY_PGNO(fp->mp_pgno, mp->mp_pgno); @@ -6776,8 +6843,9 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, } else { memcpy((char *)mp + mp->mp_upper + PAGEBASE, (char *)fp + fp->mp_upper + PAGEBASE, olddata.mv_size - fp->mp_upper - PAGEBASE); + memcpy((char *)(&mp->mp_ptrs), (char *)(&fp->mp_ptrs), NUMKEYS(fp) * sizeof(mp->mp_ptrs[0])); for (i=0; imp_ptrs[i] = fp->mp_ptrs[i] + offset; + mp->mp_ptrs[i] += offset; } } @@ -6906,8 +6974,7 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, if (m3->mc_ki[i] >= mc->mc_ki[i] && insert_key) { m3->mc_ki[i]++; } - if (XCURSOR_INITED(m3)) - XCURSOR_REFRESH(m3, mp, m3->mc_ki[i]); + XCURSOR_REFRESH(m3, i, mp); } } } @@ -6925,7 +6992,7 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, xdata.mv_size = 0; xdata.mv_data = ""; leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); - if (flags & MDB_CURRENT) { + if (flags == MDB_CURRENT) { xflags = MDB_CURRENT|MDB_NOSPILL; } else { mdb_xcursor_init1(mc, leaf); @@ -6949,7 +7016,6 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_xcursor *mx = mc->mc_xcursor; unsigned i = mc->mc_top; MDB_page *mp = mc->mc_pg[i]; - int nkeys = NUMKEYS(mp); for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2=m2->mc_next) { if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; @@ -6957,8 +7023,8 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, if (m2->mc_pg[i] == mp) { if (m2->mc_ki[i] == mc->mc_ki[i]) { mdb_xcursor_init2(m2, mx, new_dupdata); - } else if (!insert_key && m2->mc_ki[i] < nkeys) { - XCURSOR_REFRESH(m2, mp, m2->mc_ki[i]); + } else if (!insert_key) { + XCURSOR_REFRESH(m2, i, mp); } } } @@ -7030,6 +7096,8 @@ mdb_cursor_del(MDB_cursor *mc, unsigned int flags) return rc; mp = mc->mc_pg[mc->mc_top]; + if (!IS_LEAF(mp)) + return MDB_CORRUPTED; if (IS_LEAF2(mp)) goto del_key; leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]); @@ -7063,12 +7131,7 @@ mdb_cursor_del(MDB_cursor *mc, unsigned int flags) if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; if (!(m2->mc_flags & C_INITIALIZED)) continue; if (m2->mc_pg[mc->mc_top] == mp) { - MDB_node *n2 = leaf; - if (m2->mc_ki[mc->mc_top] != mc->mc_ki[mc->mc_top]) { - n2 = NODEPTR(mp, m2->mc_ki[mc->mc_top]); - if (n2->mn_flags & F_SUBDATA) continue; - } - m2->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(n2); + XCURSOR_REFRESH(m2, mc->mc_top, mp); } } } @@ -7644,9 +7707,15 @@ mdb_cursor_count(MDB_cursor *mc, size_t *countp) if (!(mc->mc_flags & C_INITIALIZED)) return EINVAL; - if (!mc->mc_snum || (mc->mc_flags & C_EOF)) + if (!mc->mc_snum) return MDB_NOTFOUND; + if (mc->mc_flags & C_EOF) { + if (mc->mc_ki[mc->mc_top] >= NUMKEYS(mc->mc_pg[mc->mc_top])) + return MDB_NOTFOUND; + mc->mc_flags ^= C_EOF; + } + leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) { *countp = 1; @@ -7908,8 +7977,8 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft) m3->mc_ki[csrc->mc_top] = cdst->mc_ki[cdst->mc_top]; m3->mc_ki[csrc->mc_top-1]++; } - if (XCURSOR_INITED(m3) && IS_LEAF(mps)) - XCURSOR_REFRESH(m3, m3->mc_pg[csrc->mc_top], m3->mc_ki[csrc->mc_top]); + if (IS_LEAF(mps)) + XCURSOR_REFRESH(m3, csrc->mc_top, m3->mc_pg[csrc->mc_top]); } } else /* Adding on the right, bump others down */ @@ -7930,8 +7999,8 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft) } else { m3->mc_ki[csrc->mc_top]--; } - if (XCURSOR_INITED(m3) && IS_LEAF(mps)) - XCURSOR_REFRESH(m3, m3->mc_pg[csrc->mc_top], m3->mc_ki[csrc->mc_top]); + if (IS_LEAF(mps)) + XCURSOR_REFRESH(m3, csrc->mc_top, m3->mc_pg[csrc->mc_top]); } } } @@ -8132,8 +8201,8 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst) m3->mc_ki[top-1] > csrc->mc_ki[top-1]) { m3->mc_ki[top-1]--; } - if (XCURSOR_INITED(m3) && IS_LEAF(psrc)) - XCURSOR_REFRESH(m3, m3->mc_pg[top], m3->mc_ki[top]); + if (IS_LEAF(psrc)) + XCURSOR_REFRESH(m3, top, m3->mc_pg[top]); } } { @@ -8395,61 +8464,75 @@ mdb_cursor_del0(MDB_cursor *mc) } else if (m3->mc_ki[mc->mc_top] > ki) { m3->mc_ki[mc->mc_top]--; } - if (XCURSOR_INITED(m3)) - XCURSOR_REFRESH(m3, m3->mc_pg[mc->mc_top], m3->mc_ki[mc->mc_top]); + XCURSOR_REFRESH(m3, mc->mc_top, mp); } } } rc = mdb_rebalance(mc); + if (rc) + goto fail; - if (rc == MDB_SUCCESS) { - /* DB is totally empty now, just bail out. - * Other cursors adjustments were already done - * by mdb_rebalance and aren't needed here. - */ - if (!mc->mc_snum) - return rc; + /* DB is totally empty now, just bail out. + * Other cursors adjustments were already done + * by mdb_rebalance and aren't needed here. + */ + if (!mc->mc_snum) { + mc->mc_flags |= C_EOF; + return rc; + } - mp = mc->mc_pg[mc->mc_top]; - nkeys = NUMKEYS(mp); + ki = mc->mc_ki[mc->mc_top]; + mp = mc->mc_pg[mc->mc_top]; + nkeys = NUMKEYS(mp); - /* Adjust other cursors pointing to mp */ - for (m2 = mc->mc_txn->mt_cursors[dbi]; !rc && m2; m2=m2->mc_next) { - m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; - if (! (m2->mc_flags & m3->mc_flags & C_INITIALIZED)) - continue; - if (m3->mc_snum < mc->mc_snum) - continue; - if (m3->mc_pg[mc->mc_top] == mp) { - /* if m3 points past last node in page, find next sibling */ - if (m3->mc_ki[mc->mc_top] >= mc->mc_ki[mc->mc_top]) { - if (m3->mc_ki[mc->mc_top] >= nkeys) { - rc = mdb_cursor_sibling(m3, 1); - if (rc == MDB_NOTFOUND) { - m3->mc_flags |= C_EOF; - rc = MDB_SUCCESS; - continue; - } - } - if (mc->mc_db->md_flags & MDB_DUPSORT) { - MDB_node *node = NODEPTR(m3->mc_pg[m3->mc_top], m3->mc_ki[m3->mc_top]); - /* If this node is a fake page, it needs to be reinited - * because its data has moved. But just reset mc_pg[0] - * if the xcursor is already live. - */ - if ((node->mn_flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA) { - if (m3->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) + /* Adjust other cursors pointing to mp */ + for (m2 = mc->mc_txn->mt_cursors[dbi]; !rc && m2; m2=m2->mc_next) { + m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; + if (!(m2->mc_flags & m3->mc_flags & C_INITIALIZED)) + continue; + if (m3->mc_snum < mc->mc_snum) + continue; + if (m3->mc_pg[mc->mc_top] == mp) { + /* if m3 points past last node in page, find next sibling */ + if (m3->mc_ki[mc->mc_top] >= nkeys) { + rc = mdb_cursor_sibling(m3, 1); + if (rc == MDB_NOTFOUND) { + m3->mc_flags |= C_EOF; + rc = MDB_SUCCESS; + continue; + } + if (rc) + goto fail; + } + if (m3->mc_ki[mc->mc_top] >= ki || + /* moved to right sibling */ m3->mc_pg[mc->mc_top] != mp) { + if (m3->mc_xcursor && !(m3->mc_flags & C_EOF)) { + MDB_node *node = NODEPTR(m3->mc_pg[m3->mc_top], m3->mc_ki[m3->mc_top]); + /* If this node has dupdata, it may need to be reinited + * because its data has moved. + * If the xcursor was not initd it must be reinited. + * Else if node points to a subDB, nothing is needed. + * Else (xcursor was initd, not a subDB) needs mc_pg[0] reset. + */ + if (node->mn_flags & F_DUPDATA) { + if (m3->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { + if (!(node->mn_flags & F_SUBDATA)) m3->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(node); - else - mdb_xcursor_init1(m3, node); + } else { + mdb_xcursor_init1(m3, node); + rc = mdb_cursor_first(&m3->mc_xcursor->mx_cursor, NULL, NULL); + if (rc) + goto fail; } } + m3->mc_xcursor->mx_cursor.mc_flags |= C_DEL; } + m3->mc_flags |= C_DEL; } } - mc->mc_flags |= C_DEL; } +fail: if (rc) mc->mc_txn->mt_flags |= MDB_TXN_ERROR; return rc; @@ -8691,7 +8774,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno * the split so the new page is emptier than the old page. * This yields better packing during sequential inserts. */ - if (nkeys < 20 || nsize > pmax/16 || newindx >= nkeys) { + if (nkeys < 32 || nsize > pmax/16 || newindx >= nkeys) { /* Find split point */ psize = 0; if (newindx <= split_indx || newindx >= nkeys) { @@ -8927,8 +9010,8 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno m3->mc_ki[ptop] >= mc->mc_ki[ptop]) { m3->mc_ki[ptop]++; } - if (XCURSOR_INITED(m3) && IS_LEAF(mp)) - XCURSOR_REFRESH(m3, m3->mc_pg[mc->mc_top], m3->mc_ki[mc->mc_top]); + if (IS_LEAF(mp)) + XCURSOR_REFRESH(m3, mc->mc_top, m3->mc_pg[mc->mc_top]); } } DPRINTF(("mp left: %d, rp left: %d", SIZELEFT(mp), SIZELEFT(rp))); @@ -9733,8 +9816,11 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db MDB_node *node = NODEPTR(mc.mc_pg[mc.mc_top], mc.mc_ki[mc.mc_top]); if ((node->mn_flags & (F_DUPDATA|F_SUBDATA)) != F_SUBDATA) return MDB_INCOMPATIBLE; - } else if (! (rc == MDB_NOTFOUND && (flags & MDB_CREATE))) { - return rc; + } else { + if (rc != MDB_NOTFOUND || !(flags & MDB_CREATE)) + return rc; + if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) + return EACCES; } /* Done here so we cannot fail after creating a new DB */ From c6b9a86987b668bced2054ae2a9d89ab94773b9f Mon Sep 17 00:00:00 2001 From: Konrad Wojas Date: Tue, 20 Apr 2021 15:32:47 +0800 Subject: [PATCH 3/3] Ignore /tmp/ --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 545c899c..73255d28 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ /lmdb/openldap /bin/ +/tmp/ # Test databases and benchmark data testdata/ @@ -29,3 +30,4 @@ _cgo_export.* _testmain.go *.exe +