package org.autoplot.html;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.net.URI;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.swing.text.html.parser.ParserDelegator;
import org.das2.util.LoggerManager;
import org.das2.util.monitor.NullProgressMonitor;
import org.das2.util.monitor.ProgressMonitor;
import org.das2.qds.DataSetOps;
import org.das2.qds.QDataSet;
import org.autoplot.datasource.AbstractDataSource;
import org.autoplot.datasource.capability.Streaming;
/**
* Data source for extracting data from HTML tables. This has been used
* for looking at real estate sales and weather history.
* @author jbf
*/
public class HtmlTableDataSource extends AbstractDataSource {
private static final Logger logger= LoggerManager.getLogger("apdss.html");
/**
* the parameter name (not label) to plot
*/
public static final String PARAM_COLUMN= "column";
public static final String PARAM_TABLE= "table";
public static final String PARAM_UNITS= "units";
public HtmlTableDataSource(URI uri) {
super(uri);
addCapability( Streaming.class, new AsciiTableStreamingSource() );
}
/**
* read the table from the file.
* @param mon
* @return
* @throws IOException
*/
public QDataSet getTable( ProgressMonitor mon ) throws IOException {
File f= getHtmlFile(resourceURI.toURL(),mon);
try (BufferedReader reader = new BufferedReader( new FileReader(f))) {
HtmlParserCallback callback = new HtmlParserCallback( );
String units= getParam("units",null);
if ( units!=null ) {
callback.setUnits(URLDecoder.decode(units,"UTF-8"));
}
String stable= (String)getParams().get( PARAM_TABLE );
if ( stable!=null ) callback.setTable( stable );
new ParserDelegator().parse( reader, callback, true );
QDataSet ds= callback.getDataSet();
return ds;
}
}
@Override
public QDataSet getDataSet( ProgressMonitor mon ) throws IOException {
QDataSet ds = getTable( mon );
String column= (String) getParams().get(PARAM_COLUMN);
if ( column==null ) {
return ds;
} else {
try {
int icol= Integer.parseInt(column);
return DataSetOps.unbundle( ds, icol );
} catch ( NumberFormatException ex ) {
return DataSetOps.unbundle( ds, column );
}
}
}
/**
* return a list of the tables, with column and human readable description after.
* @return a list of the tables, with column and human readable description after.
* @throws java.io.IOException
*/
public List getTables() throws java.io.IOException {
File f= getHtmlFile( resourceURI.toURL(), new NullProgressMonitor() );
BufferedReader reader = new BufferedReader( new FileReader(f));
HtmlParserCallback callback = new HtmlParserCallback( );
String stable= (String)getParams().get( PARAM_TABLE );
if ( stable!=null ) callback.setTable( stable );
new ParserDelegator().parse( reader, callback, true );
List tables= new ArrayList(callback.getTables());
return tables;
}
private class AsciiTableStreamingSource implements Streaming {
public AsciiTableStreamingSource() {
}
@Override
public Iterator streamDataSet(ProgressMonitor mon) throws Exception {
AsciiTableStreamer result= new AsciiTableStreamer();
final File f= getHtmlFile( resourceURI.toURL(),mon );
final BufferedReader reader = new BufferedReader( new FileReader(f) );
final HtmlParserStreamer callback = new HtmlParserStreamer( );
callback.ascii= result;
String units= getParam("units",null);
if ( units!=null ) {
callback.setUnits(URLDecoder.decode(units,"UTF-8"));
}
String stable= (String)getParams().get( PARAM_TABLE );
if ( stable!=null ) callback.setTable( stable );
Runnable run= new Runnable() {
@Override
public void run() {
try {
new ParserDelegator().parse( reader, callback, true );
logger.log(Level.FINE, "Done parsing {0}", f);
} catch ( IOException ex ) {
} finally {
try {
reader.close();
} catch ( IOException ex ) {
logger.log( Level.WARNING, ex.getMessage(), ex );
}
}
}
};
new Thread( run, "HtmlTableDataStreamer" ).start();
//new ParserDelegator().parse( reader, callback, true );
return result;
}
}
}