How to Search GMail from the Comfort of Your Command-Line

Posted by Prolific Programmer Fri, 04 Apr 2008 14:16:00 GMT

The command-line gmail search is working. Next step: see how to speed it up. It's still taking almost a minute to search 317 messages. Code pasted after the flip, as with the last message.

package com.prolificprogrammer.lucenegmail;
import java.io.File;
import java.util.logging.Logger;
import java.util.logging.Level;

import javax.mail.Folder;
import javax.mail.Message;
import javax.mail.Session;
import javax.mail.Store;
import javax.mail.internet.InternetAddress;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;

public class SearchGMail {
    private static Logger logger = Logger.getLogger(new SearchGMail().getClass().getCanonicalName());
    public static void main (String[] args) throws Exception {
	//logger.setLevel(java.util.logging.Level.FINE);
	try {
	    File path = new File(System.getProperty("java.io.tmpdir")+File.separator+"gmail.index");
	    path.mkdir();
	    path.deleteOnExit();
	    long starttime = System.currentTimeMillis();
	    IndexWriter index = new IndexWriter(path.getAbsolutePath(), new StandardAnalyzer(), true);
	    Session session = Session.getDefaultInstance(System.getProperties(), null);
	    Store store = session.getStore("pop3s");
	    store.connect("pop.gmail.com", args[0], args[1]);
	    logger.fine("Connected!");
	    Folder folder = store.getDefaultFolder();
	    folder = folder.getFolder("INBOX");
	    folder.open(Folder.READ_ONLY);
	    logger.fine("Opened INBOX");
	    Message[] messages = folder.getMessages();
	    int x;
	    for (x = 0; x != messages.length; x++) {
		try {
		    Document document = new Document();
		    String allField = ((InternetAddress)messages[x].getFrom()[0]).getAddress()+"\n"+messages[x].getSubject();
		    document.add(new Field("all", allField, Field.Store.YES, Field.Index.TOKENIZED));
		    Field messageNumberField = new Field("messageNumber", new Integer(x).toString(), Field.Store.YES, Field.Index.NO);
		    messageNumberField.setBoost((float)0.0);
		    document.add(messageNumberField);
		    index.addDocument(document);
		    logger.fine("Message "+x+" added.");
		} catch (OutOfMemoryError e) {
		    index.optimize();
		    continue;
		}
	    }
	    index.optimize();
	    index.close();
	    
	    logger.info("Index Constructed -- now searching");
	    
	    IndexSearcher searcher = new IndexSearcher(path.getAbsolutePath());
	    Analyzer analyzer = new StandardAnalyzer();
	    String query = args[2];
	    QueryParser queryParser = new QueryParser("all", analyzer);
	    Query parsedQuery = queryParser.parse(query);
	    Hits hits = searcher.search(parsedQuery);
	    for (int i = 0; i!= hits.length();i++) {
		Document doc = hits.doc(i);
		System.out.println("Message "+doc.getField("messageNumber").stringValue()+" matches "+query+" with a score of "+hits.score(i));
	    }
	    searcher.close();
	    long endtime = System.currentTimeMillis();
	    logger.severe("program took "+new Long(endtime-starttime).toString()+" miliseconds to search "+new Integer(x).toString()+" messages, which occupy "+new Long(path.length()).toString()+" bytes.");
	    java.awt.Toolkit.getDefaultToolkit().beep();
	} catch (ArrayIndexOutOfBoundsException e) {
	    logger.severe("Usage: "+new SearchGMail().getClass().getName()+" [google login] [password] [query]\nAll required");
	    System.exit(-1);
	}
    }
}
Comments

Leave a comment

Comments