[Xapian-discuss] Get term from document by position
john.alveris at Safe-mail.net
john.alveris at Safe-mail.net
Sun Jul 26 19:39:16 BST 2015
mple (see attachment).
>
> Attachments get stripped out by the mailing list, so I’ve made a private gist of the two files here: <https://gist.github.com/jaylett/ce8455b37e2b84422346>.
>
> Actually, when I run it I get 0 matches, which would explain why you’re just getting the start of the document. However if I adjust things (match the stemming strategy for TermGenerator to that for QueryParser), it still gives me the opening rather than a useful snippet.
Sorry, my mistake. The modified test.cpp file should be this (i just added
indexer.set_stemming_strategy(Xapian::TermGenerator::STEM_ALL_Z), line 34):
============= Begin of the modified test.cpp file=========
#include <xapian.h>
#include <iostream>
#include <string>
#include <cstdlib> // For exit().
#include <cstring>
#include <fstream>
class MyText
{
public:
std::string text_str;
void set_string();
};
std::string database_dir="db_dir";
std::string query_string="extracellular microbe";
int
main(int argc, char **argv)
{
// indexing
Xapian::WritableDatabase db_w(database_dir, Xapian::DB_CREATE_OR_OVERWRITE);
MyText text_to_index;
text_to_index.set_string();
Xapian::TermGenerator indexer;
Xapian::Stem stemmer("english");
indexer.set_stemmer(stemmer);
Xapian::Document doc;
indexer.set_document(doc);
indexer.set_stemming_strategy(Xapian::TermGenerator::STEM_ALL_Z);
indexer.index_text(text_to_index.text_str);
db_w.add_document(doc);
db_w.commit();
db_w.close();
//searching
Xapian::Database db(database_dir);
Xapian::Enquire enquire(db);
Xapian::QueryParser qp;
qp.set_stemmer(stemmer);
qp.set_database(db);
qp.set_default_op(Xapian::Query::OP_NEAR);
qp.set_stemming_strategy(Xapian::QueryParser::STEM_ALL_Z);
std::cout << "\n###################################################\n";
std::cout << "query string: " << query_string << "\n";
std::cout << "\n###################################################\n";
Xapian::Query query = qp.parse_query(query_string);
std::cout << "\nParsed query is: " << query.get_description() << "\n\n\n";
// Find the top 10 results for the query.
enquire.set_query(query);
Xapian::MSet matches = enquire.get_mset(0, 10);
// Display the results.
std::cout << matches.get_matches_estimated() << " results found.\n";
Xapian::Snipper snippet_generator;
snippet_generator.set_stemmer(stemmer);
snippet_generator.set_mset(matches);
std::string snippet=snippet_generator.generate_snippet(text_to_index.text_str);
std::cout << "\n###################################################\n";
std::cout << "snippet:\n" << snippet << "\n";
std::cout << "\n###################################################\n";
//cout << "Matches 1-" << matches.size() << ":\n" << endl;
//for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) {
// cout << i.get_rank() + 1 << ": " << i.get_weight() << " docid=" << *i
// << " [" << i.get_document().get_data() << "]\n\n";
}
//saves content of text.txt to text_str
//
void MyText::set_string()
{
text_str="";
std::ifstream myfile ("text.txt");
std::string line;
if (myfile.is_open())
{
while ( std::getline (myfile,line) )
{
text_str=text_str+" "+line;
}
myfile.close();
}
else
{
std::cout << "Unable to open file text.txt";
exit(1);
}
}
More information about the Xapian-discuss
mailing list