From brong at fastmailteam.com Fri Sep 4 11:57:41 2020 From: brong at fastmailteam.com (Bron Gondwana) Date: Fri, 04 Sep 2020 20:57:41 +1000 Subject: Glass cursor crash on empty table when doing Honey compact Message-ID: Hi, This is a fun one! I've been playing with the following pattern: * multiple read-only databases being compacted into one. * combined metadata to add to the destination database which needs to be calculated (language counts and indexer versions) This is in cyrus imapd. Existing code is here: https://github.com/cyrusimap/cyrus-imapd/blob/4376a87773f709516105b84e3cf12b7d55775a55/imap/xapian_wrap.cpp#L522 It does the compact, then opens the target database to fiddle the metadata. Obviously, this isn't allowed with Honey, so I decided to do the following instead: * create a new WritableDatabase and write in the metadata I want with a magic prefix on it. * update the Xapian::Compactor subclass with the magic to pick the value with the prefix and return that (including empty to remove it). That code is here: https://github.com/cyrusimap/cyrus-imapd/commit/5ff26ec64197efa568c56fc2189f3f05647b8db2 It works fine still when writing to Glass, but is crashes when writing to Honey: Program received signal SIGSEGV, Segmentation fault. Glass::LeafItem_wr::form_key (key_="", this=0x5555555d9308) at backends/glass/glass_table.h:255 255 set_key_len(key_len); (gdb) bt #0 Glass::LeafItem_wr::form_key (key_="", this=0x5555555d9308) at backends/glass/glass_table.h:255 #1 GlassTable::form_key (this=this at entry=0x5555555d92d8, key="") at backends/glass/glass_table.cc:1250 #2 0x00007ffff733bfdb in GlassCursor::find_entry_ge (this=this at entry=0x55555558fea0, key="") at backends/glass/glass_cursor.cc:257 #3 0x00007ffff73810e8 in GlassCursor::rewind (this=0x55555558fea0) at /usr/include/c++/9/bits/char_traits.h:300 #4 HoneyCompact::PositionCursor::PositionCursor (offset_=, in=0x5555555d92d8, this=0x55555558fea0) at backends/honey/honey_compact.cc:1662 #5 HoneyCompact::merge_positions (out=0x5555555b31c0, inputs=std::vector of length 2, capacity 2 = {...}, offset=std::vector of length 2, capacity 2 = {...}) at backends/honey/honey_compact.cc:1747 #6 0x00007ffff737367b in HoneyDatabase::compact (compactor=0x7ffff7fc8828 , destdir=0x5555555905e0 "/tmpfs/cas/1027490101/search2/c/user/cassandane/xapian.NEW", fd=-1, source_backend=, sources=std::vector of length 2, capacity 2 = {...}, offset=std::vector of length 2, capacity 2 = {...}, compaction=Xapian::Compactor::FULLER, flags=, last_docid=) at backends/honey/honey_compact.cc:2325 #7 0x00007ffff72ac649 in Xapian::Database::compact_ (this=this at entry=0x7fffffffddf0, output_ptr=output_ptr at entry=0x7fffffffdf60, fd=fd at entry=0, flags=, flags at entry=1290, block_size=block_size at entry=0, compactor=compactor at entry=0x7ffff7fc8828 ) at /usr/include/c++/9/bits/basic_string.h:2300 #8 0x00007ffff7f6884a in Xapian::Database::compact (compactor=..., block_size=0, flags=1290, output="/tmpfs/cas/1027490101/search2/c/user/cassandane/xapian.NEW", this=0x7fffffffddf0) at /usr/local/cyruslibs-v33/include/xapian-1.5/xapian/database.h:818 The issue is, 'p' is NULL: (gdb) p in $4 = (const GlassTable *) 0x5555555d92d8 (gdb) p *in $5 = {tablename = 0x7ffff74b33e9 "position", revision_number = 1, item_count = 0, block_size = 8192, flags = -1, faked_root_block = true, sequential = true, handle = -1, level = 0, root = 0, kt = {> = {p = 0x0}, }, buffer = 0x0, free_list = {revision = 0, first_unused_block = 0, fl = {n = 0, c = 0}, fl_end = {n = 0, c = 0}, flw = {n = 0, c = 0}, flw_appending = false, p = 0x0, pw = 0x0}, name = "/tmpfs/cas/1027490101/search2/c/user/cassandane/xapian.NEW.META/position .", seq_count = 0, changed_n = 0, changed_c = 0, max_item_size = 0, Btree_modified = false, full_compaction = false, writable = false, cursor_created_since_last_modification = true, cursor_version = 0, changes_obj = 0x0, C = {{ data = 0x0, c = -1, rewrite = false}, {data = 0x0, c = -1, rewrite = false}, {data = 0x0, c = -1, rewrite = false}, {data = 0x0, c = -1, rewrite = false}, { data = 0x0, c = -1, rewrite = false}, {data = 0x0, c = -1, rewrite = false}, {data = 0x0, c = -1, rewrite = false}, {data = 0x0, c = -1, rewrite = false}, { data = 0x0, c = -1, rewrite = false}, {data = 0x0, c = -1, rewrite = false}}, split_p = 0x0, compress_min = 0, comp_stream = {compress_strategy = 0, out_len = 0, out = 0x0, deflate_zstream = 0x0, inflate_zstream = 0x0}, lazy = true, last_readahead = 4294967295, offset = 0} Because the position table is empty! I'm not sure why 'kt' isn't being initialised. I patched around it on my testbed as follows: diff --git a/xapian-core/backends/glass/glass_cursor.cc b/xapian-core/backends/glass/glass_cursor.cc index c20ff1ab04f8..07c2a58cb7c8 100644 --- a/xapian-core/backends/glass/glass_cursor.cc +++ b/xapian-core/backends/glass/glass_cursor.cc @@ -106,6 +106,10 @@ bool GlassCursor::next() { LOGCALL(DB, bool, "GlassCursor::next", NO_ARGS); + if (B->empty()) { + is_after_end = true; + RETURN(false); + } Assert(!is_after_end); if (B->cursor_version != version) { // find_entry() will call rebuild(). @@ -144,6 +148,10 @@ bool GlassCursor::find_entry(const string &key) { LOGCALL(DB, bool, "GlassCursor::find_entry", key); + if (B->empty()) { + is_after_end = true; + RETURN(false); + } if (B->cursor_version != version) { rebuild(); } @@ -190,6 +198,10 @@ void GlassCursor::find_entry_lt(const string &key) { LOGCALL_VOID(DB, "GlassCursor::find_entry_lt", key); + if (B->empty()) { + is_after_end = true; + return; + } if (!find_entry(key)) { // The entry wasn't found, so find_entry() left us on the entry before // the one we asked for and we're done. @@ -213,6 +225,10 @@ bool GlassCursor::find_exact(const string &key) { LOGCALL(DB, bool, "GlassCursor::find_exact", key); + if (B->empty()) { + is_after_end = true; + RETURN(false); + } is_after_end = false; is_positioned = false; if (rare(key.size() > GLASS_BTREE_MAX_KEY_LEN)) { @@ -238,6 +254,10 @@ bool GlassCursor::find_entry_ge(const string &key) { LOGCALL(DB, bool, "GlassCursor::find_entry_ge", key); + if (B->empty()) { + is_after_end = true; + RETURN(false); + } if (B->cursor_version != version) { rebuild(); } But I'm not sure if the right approach is to patch the cursor to not try and build a key if there's no data, or if kt should be initialised. Or indeed, if the Honey compactor should check for empty tables and just skip them without trying to create a cursor! What I think I would really like is an API to pass a hash of key/value metadata items into db.compact rather than having to do this workaround, but either way it's bad if we crash on an empty database! Cheers, Bron. -- Bron Gondwana, CEO, Fastmail Pty Ltd brong at fastmailteam.com -------------- next part -------------- An HTML attachment was scrubbed... URL: From olly at survex.com Mon Sep 7 01:42:03 2020 From: olly at survex.com (Olly Betts) Date: Mon, 7 Sep 2020 01:42:03 +0100 Subject: Glass cursor crash on empty table when doing Honey compact In-Reply-To: References: Message-ID: <20200907004203.GH18037@survex.com> On Fri, Sep 04, 2020 at 08:57:41PM +1000, Bron Gondwana wrote: > Because the position table is empty! I'm not sure why 'kt' isn't > being initialised. The buffer inside the kt is allocated by basic_open(), which only gets called if that table actually exists on disk, or when it's first created. > But I'm not sure if the right approach is to patch the cursor to not > try and build a key if there's no data, or if kt should be > initialised. > > Or indeed, if the Honey compactor should check for empty tables and > just skip them without trying to create a cursor! That's what the glass compaction code does, which is why this doesn't affect it - that seems a reasonable way to fix this. I want to dig into this a bit more to make sure we aren't just avoiding a latent problem that could bite us in other situations though. > What I think I would really like is an API to pass a hash of key/value > metadata items into db.compact rather than having to do this > workaround, but either way it's bad if we crash on an empty database! It couldn't really be a hash, since we need access to keys in ascending order. But once honey supports update this wouldn't be needed, so I think creating a temporary database is a reasonable workaround for now. Cheers, Olly From anonymous.maarten at gmail.com Thu Sep 17 13:45:04 2020 From: anonymous.maarten at gmail.com (anonymous.maarten at gmail.com) Date: Thu, 17 Sep 2020 12:45:04 -0000 Subject: [PATCH] Add ability to link to static xapian library in generated xapian-config.cmake Message-ID: <20200917124435.27965-1-anonymous.maarten@gmail.com> From: Anonymous Maarten Hello! The generated xapian-config.cmake does only support shared xapian libraries. The patch below allows using a static/shared xapian library in a cmake project. This patch was created to allow linking doxygen to a static xapian. See https://github.com/doxygen/doxygen/issues/8038 for more details. Kind regards Maarten --- xapian-core/cmake/xapian-config.cmake.in | 13 +++++++++++-- xapian-core/configure.ac | 18 +++++++++++++++++- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/xapian-core/cmake/xapian-config.cmake.in b/xapian-core/cmake/xapian-config.cmake.in index e2222aee4285..cc634168b829 100644 --- a/xapian-core/cmake/xapian-config.cmake.in +++ b/xapian-core/cmake/xapian-config.cmake.in @@ -1,5 +1,14 @@ SET(prefix "@prefix@") SET(exec_prefix "@exec_prefix@") -SET(XAPIAN_LIBRARIES "@libdir@/libxapian at LIBRARY_VERSION_SUFFIX@@SHLIBEXT@" CACHE FILEPATH "Libraries for Xapian") +SET(XAPIAN_SHARED @XAPIAN_CMAKE_SHARED@ CACHE BOOL "Use shared xapian library") +SET(XAPIAN_SHARED_LIBRARY "@libdir@/libxapian at LIBRARY_VERSION_SUFFIX@@SHLIBEXT@" CACHE FILEPATH "Shared Xapian library") +SET(XAPIAN_STATIC_LIBRARY "@libdir@/libxapian at LIBRARY_VERSION_SUFFIX@.a" CACHE FILEPATH "Static Xapian library") +SET(XAPIAN_STATIC_DEP_LIBS "@XAPIAN_CMAKE_LIBS@" CACHE STRING "Dependencies of static xapian library") +IF(XAPIAN_SHARED) + SET(XAPIAN_LIBRARIES "${XAPIAN_SHARED_LIBRARY}") +ELSE() + FIND_PACKAGE(ZLIB REQUIRED) + SET(XAPIAN_LIBRARIES "${XAPIAN_STATIC_LIBRARY};${XAPIAN_STATIC_DEP_LIBS};${ZLIB_LIBRARIES}") +ENDIF() SET(XAPIAN_INCLUDE_DIR "@incdir@" CACHE PATH "Include path for Xapian") -SET(XAPIAN_FOUND "TRUE") +SET(XAPIAN_FOUND ON) diff --git a/xapian-core/configure.ac b/xapian-core/configure.ac index 5e83c87ffef0..357996e299fd 100644 --- a/xapian-core/configure.ac +++ b/xapian-core/configure.ac @@ -279,10 +279,21 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [[ #endif ]])], [msvc=no], [msvc=yes]) +XAPIAN_CMAKE_SHARED= +AC_SUBST([XAPIAN_CMAKE_SHARED]) + +if test "$enable_shared" = yes; then + XAPIAN_CMAKE_SHARED=ON +else + XAPIAN_CMAKE_SHARED=OFF +fi + XAPIAN_LDFLAGS= XAPIAN_LIBS= +XAPIAN_CMAKE_LIBS= AC_SUBST([XAPIAN_LDFLAGS]) AC_SUBST([XAPIAN_LIBS]) +AC_SUBST([XAPIAN_CMAKE_LIBS]) AC_DEFUN([XAPIAN_TEST_FLAGS_], [ @@ -504,6 +515,7 @@ AC_SEARCH_LIBS([timer_create], [rt], #endif]])], [AC_MSG_RESULT([yes]) XAPIAN_LIBS="$LIBS $XAPIAN_LIBS" + XAPIAN_CMAKE_LIBS=";rt$XAPIAN_CMAKE_LIBS" AC_DEFINE([HAVE_TIMER_CREATE], [1], [Define to 1 if you have the 'timer_create' function.])] , [AC_MSG_RESULT([no]) @@ -893,6 +905,7 @@ case $enable_backend_glass$enable_backend_honey in ]) if test x != x"$LIBS" ; then XAPIAN_LIBS="$XAPIAN_LIBS $LIBS" + dnl CMake finds zlib itself using FindZLIB.cmake fi LIBS=$SAVE_LIBS @@ -908,6 +921,7 @@ case $enable_backend_glass$enable_backend_honey in win32_need_lws2_32=1 fi XAPIAN_LIBS="$XAPIAN_LIBS -lrpcrt4" + XAPIAN_CMAKE_LIBS="$XAPIAN_CMAKE_LIBS;rpcrt4" ;; *) dnl Check for uuid/uuid.h (e2fsprogs/util-linux-ng) or uuid.h @@ -921,6 +935,7 @@ case $enable_backend_glass$enable_backend_honey in ]) if test x != x"$LIBS" ; then XAPIAN_LIBS="$XAPIAN_LIBS $LIBS" + XAPIAN_CMAKE_LIBS="$XAPIAN_CMAKE_LIBS;uuid" fi ], [ dnl Try uuid.h as found on FreeBSD/NetBSD/OpenBSD/AIX, with associated @@ -1019,6 +1034,7 @@ fi if test "$win32_need_lws2_32" = 1 ; then XAPIAN_LIBS="$XAPIAN_LIBS -lws2_32" + XAPIAN_CMAKE_LIBS="$XAPIAN_CMAKE_LIBS;ws2_32" fi AC_ARG_ENABLE([visibility], @@ -1059,7 +1075,7 @@ dnl We need to actually check for a declaration as OS X has a dummy dnl implementation in the library which is not prototyped in any header. AC_CHECK_DECL([fdatasync(int)], [ SAVE_LIBS=$LIBS - AC_SEARCH_LIBS([fdatasync], [rt], [XAPIAN_LIBS="$LIBS $XAPIAN_LIBS"]) + AC_SEARCH_LIBS([fdatasync], [rt], [XAPIAN_LIBS="$LIBS $XAPIAN_LIBS"; XAPIAN_CMAKE_LIBS=";rt;$XAPIAN_CMAKE_LIBS"]) LIBS=$SAVE_LIBS AC_CHECK_FUNCS([fdatasync]) ], -- 2.21.3