sorting large msets
Eric Wong
e at 80x24.org
Fri Mar 30 18:21:43 BST 2018
Hello, is there a way to optimize sorting by certain values
for queries which return a huge amount of results?
For example, I just want a simple query that gives me the 200
most recent emails out of millions. The elapsed time for
get_mset increases as the number of documents ($n * 2000)
increases.
I suppose I could store a pre-sorted set using SQLite or
similar. Thanks in advance for any advice/help you can provide.
-----------8<--------
#!/usr/bin/perl -w
use strict;
use warnings;
use Search::Xapian::Document;
use Search::Xapian qw/:standard/;
use Search::Xapian::WritableDatabase;
use File::Temp qw(tempdir);
use Time::HiRes qw(clock_gettime CLOCK_MONOTONIC);
my $tmp = tempdir('xapian-test-XXXXXXX', CLEANUP => 1, TMPDIR => 1);
my $flag = Search::Xapian::DB_CREATE_OR_OPEN;
my $xdb = Search::Xapian::WritableDatabase->new($tmp, $flag);
my $n = shift || 100;
for my $i (0..$n) {
$xdb->begin_transaction;
for my $j (0..2000) {
my $doc = Search::Xapian::Document->new;
my $num = Search::Xapian::sortable_serialise(($i * 1000) + $j);
$doc->add_value(0, $num);
$doc->set_data("$i $j");
$doc->add_boolean_term('T' . 'mail');
$xdb->add_document($doc);
$doc = Search::Xapian::Document->new;
$doc->add_value(0, $num);
$doc->set_data("$i $j");
$doc->add_boolean_term('T' . 'ghost');
$xdb->add_document($doc);
}
$xdb->commit_transaction;
}
my $enquire = Search::Xapian::Enquire->new($xdb);
my $mail_query = Search::Xapian::Query->new('T' . 'mail');
$enquire->set_query($mail_query);
$enquire->set_sort_by_value_then_relevance(0, 1);
my $offset = 0;
my $limit = 200;
my $t0 = clock_gettime(CLOCK_MONOTONIC);
my $mset = $enquire->get_mset($offset, $limit);
my $t1 = clock_gettime(CLOCK_MONOTONIC);
my $elapsed = $t1 - $t0;
$xdb = undef;
$tmp = undef;
print $elapsed, "\n";
__END__
More information about the Xapian-discuss
mailing list