[Xapian-discuss] Get term list from Query in python
Olly Betts
olly at survex.com
Thu Mar 16 02:58:22 GMT 2006
On Mon, Mar 13, 2006 at 04:38:21PM +0000, Olly Betts wrote:
> On Sat, Mar 11, 2006 at 12:56:24AM +0900, Sungsoo Kim wrote:
> > for term in query: print term
>
> > RuntimeError: InvalidOperationError: VectorTermList::get_termfreq() not supported
>
> The problem is that get_termfreq() isn't meaningful for terms in a
> query, but the python bindings ask for it anyway in this case.
OK, I've fixed this in SVN. The attached patch should do the trick, but
to use it you'll need to configure xapian-bindings with
--enable-maintainer-mode and have SWIG 1.3.28 installed.
Cheers,
Olly
-------------- next part --------------
Index: extra.i
===================================================================
--- extra.i (revision 6663)
+++ extra.i (working copy)
@@ -2,7 +2,7 @@
/* python/extra.i: Xapian scripting python interface additional code.
*
* Copyright (C) 2003,2004,2005 James Aylett
- * Copyright (C) 2005 Olly Betts
+ * Copyright (C) 2005,2006 Olly Betts
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
@@ -57,9 +57,14 @@
return r
class TermIter:
- def __init__(self, start, end):
+ HAS_NOTHING = 0
+ HAS_TERMFREQS = 1
+ HAS_POSITIONS = 2
+
+ def __init__(self, start, end, has = HAS_NOTHING):
self.iter = start
self.end = end
+ self.has = has
def __iter__(self):
return self
@@ -68,7 +73,14 @@
if self.iter==self.end:
raise StopIteration
else:
- r = [self.iter.get_term(), self.iter.get_wdf(), self.iter.get_termfreq(), PositionIter(self.iter.positionlist_begin(), self.iter.positionlist_end())]
+ termfreq = 0
+ if self.has & TermIter.HAS_TERMFREQS:
+ termfreq = self.iter.get_termfreq()
+ if self.has & TermIter.HAS_POSITIONS:
+ positer = PositionIter(self.iter.positionlist_begin(), self.iter.positionlist_end())
+ else:
+ positer = PositionIter()
+ r = [self.iter.get_term(), self.iter.get_wdf(), termfreq, positer]
self.iter.next()
return r
@@ -89,7 +101,7 @@
return r
class PositionIter:
- def __init__(self, start, end):
+ def __init__(self, start = 0, end = 0):
self.iter = start
self.end = end
@@ -142,14 +154,14 @@
Query.__iter__ = query_gen_iter
def database_gen_allterms_iter(self):
- return TermIter(self.allterms_begin(), self.allterms_end())
+ return TermIter(self.allterms_begin(), self.allterms_end(), TermIter.HAS_TERMFREQS)
Database.__iter__ = database_gen_allterms_iter
def database_gen_postlist_iter(self, tname):
return PostingIter(self.postlist_begin(tname), self.postlist_end(tname))
def database_gen_termlist_iter(self, docid):
- return TermIter(self.termlist_begin(docid), self.termlist_end(docid))
+ return TermIter(self.termlist_begin(docid), self.termlist_end(docid), TermIter.HAS_TERMFREQS)
def database_gen_positionlist_iter(self, docid, tname):
return PositionIter(self.positionlist_begin(docid, tname), self.positionlist_end(docid, tname))
@@ -159,7 +171,7 @@
Database.positionlist = database_gen_positionlist_iter
def document_gen_termlist_iter(self):
- return TermIter(self.termlist_begin(), self.termlist_end())
+ return TermIter(self.termlist_begin(), self.termlist_end(), TermIter.HAS_POSITIONS)
def document_gen_values_iter(self):
return ValueIter(self.values_begin(), self.values_end())
More information about the Xapian-discuss
mailing list