[Xapian-discuss] Get term from document by position

john.alveris at Safe-mail.net john.alveris at Safe-mail.net
Sun Jul 26 19:39:16 BST 2015


mple (see attachment).
> 
> Attachments get stripped out by the mailing list, so I’ve made a private gist of the two files here: <https://gist.github.com/jaylett/ce8455b37e2b84422346>.
> 
> Actually, when I run it I get 0 matches, which would explain why you’re just getting the start of the document. However if I adjust things (match the stemming strategy for TermGenerator to that for QueryParser), it still gives me the opening rather than a useful snippet.

Sorry, my mistake. The modified test.cpp file should be this (i just added
    indexer.set_stemming_strategy(Xapian::TermGenerator::STEM_ALL_Z), line 34):

============= Begin of the modified test.cpp file=========


#include <xapian.h>

#include <iostream>
#include <string>

#include <cstdlib> // For exit().
#include <cstring>
#include <fstream>

class MyText
{
public:
std::string text_str;
void set_string();
};


std::string database_dir="db_dir";
std::string query_string="extracellular microbe";
int
main(int argc, char **argv)
{
  
// indexing
    Xapian::WritableDatabase db_w(database_dir, Xapian::DB_CREATE_OR_OVERWRITE);
    MyText text_to_index;
    text_to_index.set_string();
    Xapian::TermGenerator indexer;
    Xapian::Stem stemmer("english");
    indexer.set_stemmer(stemmer);
    Xapian::Document doc;
    indexer.set_document(doc);
    indexer.set_stemming_strategy(Xapian::TermGenerator::STEM_ALL_Z);
    indexer.index_text(text_to_index.text_str);
    db_w.add_document(doc);
    db_w.commit();
    db_w.close();
    
    
    
//searching
Xapian::Database db(database_dir);
Xapian::Enquire enquire(db);

    Xapian::QueryParser qp;
    qp.set_stemmer(stemmer);
    qp.set_database(db);
    qp.set_default_op(Xapian::Query::OP_NEAR);
    qp.set_stemming_strategy(Xapian::QueryParser::STEM_ALL_Z);
    std::cout << "\n###################################################\n";
    std::cout << "query string: " << query_string << "\n";
    std::cout << "\n###################################################\n";
    Xapian::Query query = qp.parse_query(query_string);
    std::cout << "\nParsed query is: " << query.get_description() << "\n\n\n";

    // Find the top 10 results for the query.
    enquire.set_query(query);
    Xapian::MSet matches = enquire.get_mset(0, 10);

    // Display the results.
    std::cout << matches.get_matches_estimated() << " results found.\n";
    
    
    
    Xapian::Snipper snippet_generator;
    snippet_generator.set_stemmer(stemmer);
    snippet_generator.set_mset(matches);
    std::string snippet=snippet_generator.generate_snippet(text_to_index.text_str);
    std::cout << "\n###################################################\n";
    std::cout << "snippet:\n" << snippet << "\n";
    std::cout << "\n###################################################\n";
    //cout << "Matches 1-" << matches.size() << ":\n" << endl;

    //for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) {
    //    cout << i.get_rank() + 1 << ": " << i.get_weight() << " docid=" << *i
    //         << " [" << i.get_document().get_data() << "]\n\n";
}


//saves content of text.txt to text_str
//
void MyText::set_string()
{
text_str="";
std::ifstream myfile ("text.txt");
std::string line;
if (myfile.is_open())
  {
    while ( std::getline (myfile,line) )
      {
      text_str=text_str+" "+line;
      }
    myfile.close();
  }

else
  {
  std::cout << "Unable to open file text.txt";
  exit(1);
  }

}



More information about the Xapian-discuss mailing list