How to Search Gmail from the comfort of your Keyboard 2
The Java code below leverages Lucene 2.3.1 and javamail to create a command-line search of your GMail inbox. It's actually quite slow, so I'd like to speed it up over time, but it does give updates as it runs, perhaps too many. Any (and all) suggestions appreciated?
package com.prolificprogrammer.lucenegmail;
import java.io.File;
import javax.mail.Folder;
import javax.mail.Message;
import javax.mail.Session;
import javax.mail.Store;
import javax.mail.internet.InternetAddress;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
public class SearchGMail {
public static void main (String[] args) throws Exception {
File path = new File(System.getProperty("java.io.tmpdir")+File.separator+"gmail.index");
path.mkdir();
path.deleteOnExit();
long starttime = System.currentTimeMillis();
IndexWriter index = new IndexWriter(path.getAbsolutePath(), new StandardAnalyzer(), true);
Session session = Session.getDefaultInstance(System.getProperties(), null);
Store store = session.getStore("imaps");
store.connect("imap.gmail.com", args[0], args[1]);
System.err.println("Connected!");
Folder folder = store.getDefaultFolder();
folder = folder.getFolder("INBOX");
folder.open(Folder.READ_ONLY);
System.err.println("Opened INBOX");
Message[] messages = folder.getMessages();
System.err.println("Messages retrieved!");
int x;
for (x = 0; x != messages.length; x++) {
Document document = new Document();
String allField = ((InternetAddress)messages[x].getFrom()[0]).getAddress()+"\n"+messages[x].getSubject();
document.add(new Field("all", allField, Field.Store.YES, Field.Index.TOKENIZED));
document.add(new Field("messageNumber", new Integer(x).toString(), Field.Store.YES, Field.Index.NO));
index.addDocument(document);
System.err.println("Message "+x+" added.");
}
index.optimize();
index.close();
System.err.println("Ok, index constructed with "+x+" messages in "+path.getAbsolutePath()+", now searching it");
IndexSearcher searcher = new IndexSearcher(path.getAbsolutePath());
Analyzer analyzer = new StandardAnalyzer();
String query = args[3];
QueryParser queryParser = new QueryParser("all", analyzer);
Query parsedQuery = queryParser.parse(query);
Hits hits = searcher.search(parsedQuery);
for (int i = 0; i!= hits.length();i++) {
Document doc = hits.doc(i);
System.out.println(doc.getField("messageNumber"));
}
searcher.close();
long endtime = System.currentTimeMillis();
System.err.println("program took "+endtime-starttime+" miliseconds to search "+x+" messages, which occupy "+path.length()+" bytes.");
java.awt.Toolkit.getDefaultToolkit().beep();
}
}
Comments
-
Preliminary testing shows that gmail has a weird notion of total messages, and that the code requires more heap space.
-
I like the colours
