[Xapian-discuss] constructing phrase queries

James Aylett james-xapian at tartarus.org
Thu Sep 30 17:52:50 BST 2004


On Thu, Sep 30, 2004 at 01:10:32PM +0100, Olly Betts wrote:

> terms = [ 'to', 'be', 'or', 'not', 'to', 'be' ]
> query = xapian.Query_from_list(xapian.Query.OP_PHRASE, terms)

Yeah, that's fairly easy. What I /want/ to be able to do is to
overload the Query() constructor as we do for every other class. This
seems impossible however, because _xapian.new_Query_from_list() is
synthetic, ie it doesn't exist in Xapian itself, but only in the
wrapper. The overloading uses SWIG magic, which doesn't seem to see
the synthetic signature. (And if I just rename the synthetic
constructor it gets /really/ strange ...)

I've got the above working, at the cost of losing all remaining
constructor overloading on the xapian.Query class. I'll try to get it
working with some overloading again at some point, but for the moment
a patch is attached to get this behaviour.

I've also included a bonus patch which gives some more Python-style
iterators, although these aren't fully tested and may not work. Don't
apply unless you want to play, in other words :-)

J

-- 
/--------------------------------------------------------------------------\
  James Aylett                                                  xapian.org
  james at tartarus.org                               uncertaintydivision.org
-------------- next part --------------
Index: xapian.i
===================================================================
RCS file: /usr/data/cvs/xapian/xapian-bindings/xapian.i,v
retrieving revision 1.15
diff -p -u -r1.15 xapian.i
--- xapian.i	29 Sep 2004 18:12:20 -0000	1.15
+++ xapian.i	30 Sep 2004 16:45:43 -0000
@@ -560,16 +560,19 @@ class Query {
 	%name(Query_from_term_pair) Query(Query::op op_, const std::string & left, const std::string & right);
 	%name(Query_empty) Query();
 #else
-	Query(const Query& copyme);
+	%name(Query_from_query_pair) Query(Query::op op_, const Query & left, const Query & right);
+	%name(Query_from_term_pair) Query(Query::op op_, const std::string & left, const std::string & right);
+	%name(Query_empty) Query();
+	/*	Query(const Query& copyme);
 	Query(Query::op op_, const Query & left, const Query & right);
-	Query(Query::op op_, const std::string & left, const std::string & right);
-	%extend {
-	    /** Constructs a query from a set of queries merged with the specified operator */
+	Query(Query::op op_, const std::string & left, const std::string & right);*/
+        %name(Query_from_list) %extend {
+           /** Constructs a query from a set of terms merged with the specified operator */
 	    Query(Query::op op,
-		  const vector<Query *> *subqs,
+		  const std::vector<std::string> *subqs,
 		  termpos window = 0) {
 		if ((subqs->size() == 2) && (window == 0)) {
-		    return new Xapian::Query(op, *(*subqs)[0], *(*subqs)[1]);
+		    return new Xapian::Query(op, (*subqs)[0], (*subqs)[1]);
 		} else {
 		    Xapian::Query * query=new Xapian::Query(op, subqs->begin(),subqs->end());
 		    query->set_window(window);
@@ -579,7 +582,7 @@ class Query {
 	}
 
 	/** Constructs a new empty query object */
-	Query();
+	//Query();
 #endif
 
 	~Query();
Index: python/util.i
===================================================================
RCS file: /usr/data/cvs/xapian/xapian-bindings/python/util.i,v
retrieving revision 1.4
diff -p -u -r1.4 util.i
--- python/util.i	8 Sep 2004 03:08:17 -0000	1.4
+++ python/util.i	30 Sep 2004 15:00:51 -0000
@@ -137,25 +137,30 @@
     $1 = 0;
 }
 */
-%typemap(python, in) const vector<string> &(vector<string> v){
+%typemap(python, freearg) const vector<string>* {
+    delete $1;
+}
+
+%typemap(python, in) const std::vector<std::string>* {
     if (!PyList_Check($input)) {
         PyErr_SetString(PyExc_TypeError, "expected list");
         return NULL;
     }
+    vector<string> *v = new vector<string>();
     int numitems = PyList_Size($input);
     for (int i=0; i<numitems; ++i) {
         PyObject *obj = PyList_GetItem($input, i);
 	if (PyString_Check(obj)) {
 	    int len = PyString_Size(obj);
 	    char *err = PyString_AsString(obj);
-	    v.push_back(string(err, len));
+	    v->push_back(string(err, len));
 	} else {
 	    PyErr_SetString(PyExc_TypeError,
 			    "expected list of strings");
 	    return NULL;
 	}
     }
-    $1 = &v;
+    $1 = v;
 }
 
 %typedef PyObject *LangSpecificListType;
-------------- next part --------------
Index: python/extra.i
===================================================================
RCS file: /usr/data/cvs/xapian/xapian-bindings/python/extra.i,v
retrieving revision 1.1
diff -p -u -r1.1 extra.i
--- python/extra.i	1 Jan 2004 22:48:35 -0000	1.1
+++ python/extra.i	26 May 2004 22:39:36 -0000
@@ -2,7 +2,7 @@
 /* python/extra.i: Xapian scripting python interface additional code.
  *
  * ----START-LICENCE----
- * Copyright 2003 James Aylett
+ * Copyright 2003,2004 James Aylett
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
@@ -22,8 +22,9 @@
  */
 %}
 
-// Provide Python-style iterator access to the MSet
 %pythoncode %{
+
+# Python-style iterators to mirror the C++ ones
 class MSetIter:
     def __init__(self, start, end):
         self.iter = start
@@ -40,8 +41,131 @@ class MSetIter:
 	    self.iter.next()
 	    return r
 
+class ESetIter:
+    def __init__(self, start, end):
+        self.iter = start
+        self.end = end
+
+    def __iter__(self):
+	return self
+
+    def next(self):
+	if self.iter==self.end:
+	    raise StopIteration
+	else:
+	    r = [self.iter.get_termname(), self.iter.get_weight()]
+	    self.iter.next()
+	    return r
+
+class TermIter:
+    def __init__(self, start, end):
+        self.iter = start
+        self.end = end
+
+    def __iter__(self):
+	return self
+
+    def next(self):
+	if self.iter==self.end:
+	    raise StopIteration
+	else:
+	    r = [self.iter.get_term(), self.iter.get_wdf(), self.iter.get_termfreq(), PositionIter(self.iter.positionlist_begin(), positionlist_end())]
+	    self.iter.next()
+	    return r
+
+class PostingIter:
+    def __init__(self, start, end):
+        self.iter = start
+        self.end = end
+
+    def __iter__(self):
+	return self
+
+    def next(self):
+	if self.iter==self.end:
+	    raise StopIteration
+	else:
+	    r = [self.iter.get_docid(), self.iter.get_doclength(), self.iter.get_wdf(), PositionIter(self.iter.positionlist_begin(), self.iter.positionlist_end())]
+	    self.iter.next()
+	    return r
+
+class PositionIter:
+    def __init__(self, start, end):
+        self.iter = start
+        self.end = end
+
+    def __iter__(self):
+	return self
+
+    def next(self):
+	if self.iter==self.end:
+	    raise StopIteration
+	else:
+	    r = self.iter.get_termpos()
+	    self.iter.next()
+	    return r
+
+class ValueIterator:
+    def __init__(self, start, end):
+        self.iter = start
+        self.end = end
+
+    def __iter__(self):
+	return self
+
+    def next(self):
+	if self.iter==self.end:
+	    raise StopIteration
+	else:
+	    r = [self.iter.get_valueno(), self.iter.get_value()]
+	    self.iter.next()
+	    return r
+
+# Bind the Python iterators into the shadow classes
 def mset_gen_iter(self):
     return MSetIter(self.begin(), self.end())
 
 MSet.__iter__ = mset_gen_iter
+
+def eset_gen_iter(self):
+    return ESetIter(self.begin(), self.end())
+
+ESet.__iter__ = eset_gen_iter
+
+def enquire_gen_iter(self, which):
+    return TermIter(self.get_matching_terms_begin(which), self.get_matching_terms_end())
+
+Enquire.matching_terms = enquire_gen_iter
+
+def query_gen_iter(self):
+    return TermIter(self.get_terms_begin(), self.get_terms_end())
+
+Query.__iter__ = query_gen_iter
+
+def database_gen_allterms_iter(self):
+    return TermIter(self.allterms_begin(), self.allterms_end())
+
+Database.__iter__ = database_gen_allterms_iter
+
+def database_gen_postlist_iter(self, tname):
+    return PostingIter(self.postlist_begin(tname), self.postlist_end(tname))
+def database_gen_termlist_iter(self, docid):
+    return TermIter(self.termlist_begin(docid), self.termlist_end(docid))
+def database_gen_positionlist_iter(self, docid, tname):
+    return PositionIter(self.positionlist_begin(docid, tname), self.positionlist_end(docid, tname))
+
+Database.allterms = database_gen_allterms_iter
+Database.postlist = database_gen_postlist_iter
+Database.termlist = database_gen_termlist_iter
+Database.positionlist = database_gen_positionlist_iter
+
+def document_gen_termlist_iter(self, docid, tname):
+    return TermIter(self.termlist_begin(), self.termlist_end())
+def document_gen_valuelist_iter(self, docid, tname):
+    return TermIter(self.values_begin(), self.values_end())
+
+Document.__iter__ = document_gen_termlist_iter
+Document.terms = document_gen_termlist_iter
+Document.values = document_gen_valuelist_iter
+
 %}


More information about the Xapian-discuss mailing list