sorting large msets

Eric Wong e at 80x24.org
Fri Mar 30 18:21:43 BST 2018


Hello, is there a way to optimize sorting by certain values
for queries which return a huge amount of results?

For example, I just want a simple query that gives me the 200
most recent emails out of millions.  The elapsed time for
get_mset increases as the number of documents ($n * 2000)
increases.

I suppose I could store a pre-sorted set using SQLite or
similar.  Thanks in advance for any advice/help you can provide.

-----------8<--------
#!/usr/bin/perl -w
use strict;
use warnings;
use Search::Xapian::Document;
use Search::Xapian qw/:standard/;
use Search::Xapian::WritableDatabase;
use File::Temp qw(tempdir);
use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC);

my $tmp = tempdir('xapian-test-XXXXXXX', CLEANUP => 1, TMPDIR => 1);
my $flag = Search::Xapian::DB_CREATE_OR_OPEN;
my $xdb = Search::Xapian::WritableDatabase->new($tmp, $flag);
my $n = shift || 100;
for my $i (0..$n) {
	$xdb->begin_transaction;
	for my $j (0..2000) {
		my $doc = Search::Xapian::Document->new;
		my $num = Search::Xapian::sortable_serialise(($i * 1000) + $j);
		$doc->add_value(0, $num);
		$doc->set_data("$i $j");
		$doc->add_boolean_term('T' . 'mail');
		$xdb->add_document($doc);

		$doc = Search::Xapian::Document->new;
		$doc->add_value(0, $num);
		$doc->set_data("$i $j");
		$doc->add_boolean_term('T' . 'ghost');
		$xdb->add_document($doc);
	}
	$xdb->commit_transaction;
}
my $enquire = Search::Xapian::Enquire->new($xdb);
my $mail_query = Search::Xapian::Query->new('T' . 'mail');
$enquire->set_query($mail_query);
$enquire->set_sort_by_value_then_relevance(0, 1);
my $offset = 0;
my $limit = 200;
my $t0 = clock_gettime(CLOCK_MONOTONIC);
my $mset = $enquire->get_mset($offset, $limit);
my $t1 = clock_gettime(CLOCK_MONOTONIC);
my $elapsed = $t1 - $t0;
$xdb = undef;
$tmp = undef;
print $elapsed, "\n";
__END__



More information about the Xapian-discuss mailing list