/*
* SPDFMetaDataScraper.java
*
* Created on February 2, 2007, 7:21 AM
*
* To change this template, choose Tools | Template Manager
* and open the template in the editor.
*/
package org.autoplot.dods;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.parser.ParserDelegator;
import org.das2.util.LoggerManager;
/**
* Scrape the metadata from the <dods URL>.html form of the data.
* Get a new instance, call parse( <dods URL>.html ), then
* call getAttr(String varName) which returns a Map of the properties.
*
* Note the scraping is only necessary because Jeremy forgot about the
* .das and .dds extensions. .dds returns the stream syntax. .das returns
* the metadata.
*
* @author jbf
*/
public class MetaDataScraper {
private final static Logger logger= LoggerManager.getLogger("apdss.opendap");
HashMap varAttrs;
HashMap varAttrsData;
HashMap recDims;
class MyCallBack extends HTMLEditorKit.ParserCallback {
String varName;
String recDim;
Object nameKey=HTML.getAttributeKey("name");
public void handleText(char[] data, int pos) {
if ( varName!=null ) {
varAttrsData.put( varName, data );
} else {
String s= new String( data );
int i= s.indexOf("[RecDim =");
if ( i==-1 ) i= s.indexOf("[Dim0 ="); // sometimes doesn't start with RecDim (ENERGY_ELE)
if (i!= -1 ) {
recDim= s.substring(i); // the RecDim is the last bit in the the Text segment.
}
}
}
public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) {
super.handleStartTag(t, a, pos);
if ( t==HTML.Tag.TEXTAREA ) {
String nameAttr= (String)a.getAttribute(nameKey) ;
if ( nameAttr!=null && nameAttr.endsWith("_attr") ) {
varName= nameAttr.substring(0,nameAttr.length()-5);
if ( recDim!=null ) {
recDims.put( varName, recDim );
recDim= null;
}
} else {
varName=null;
}
} else {
varName=null;
}
}
public void handleEndTag(HTML.Tag t, int pos) {
super.handleEndTag(t, pos);
varName= null;
}
}
/**
* retrieve the URL, which should be a dods server form. The
* content is scraped, looking for textareas with the name
* varname_attr. The textarea content is assumed to
* be a newline delimited set of name value pairs, name: value.
* Value is of type Double or String.
*
* After parseURL is performed, getAttr is used to get Attributes.
* @param url
* @throws java.io.IOException
* @throws IllegalArgumentException when the url does not end in .html
*/
public void parseURL( URL url ) throws IOException {
if ( !url.toString().endsWith(".html" ) ) throw new IllegalArgumentException("must end in .html");
varAttrs= new HashMap();
varAttrsData= new HashMap();
recDims= new HashMap();
logger.log(Level.FINE, "parseURL opening {0}", url);
try ( InputStream in= url.openStream() ) {
new ParserDelegator().parse( new InputStreamReader(in), new MyCallBack(), true );
}
}
private Map parseData( char[] data ) {
HashMap result= new HashMap();
String s= new String(data);
String[] ss= s.split("\n");
for (String s1 : ss) {
int ic = s1.indexOf(":");
String name = s1.substring(0, ic);
String value = s1.substring(ic+1).trim();
if ( value.startsWith("\"") ) {
value= value.substring(1,value.length()-1);
result.put( name, value );
} else {
result.put(name, Double.parseDouble(value));
}
}
return result;
}
/**
* provides the attributes for this variable in a map. The keys are the String
* attribute name (e.g. UNITS) and the values are either type String or Double.
* @param varName the variable name
* @return the attributes
*/
public Map getAttr( String varName ) {
if ( varAttrs==null ) throw new IllegalArgumentException("need to parse URL first");
Map result= (Map) varAttrs.get(varName);
if ( result==null ) {
char[] data= (char[]) varAttrsData.get(varName);
if ( data==null ) throw new IllegalArgumentException("variable not found: "+varName );
result= parseData( data );
varAttrs.put( varName, result );
}
return result;
}
public int[] getRecDims( String varName ) {
String rds= (String) recDims.get(varName);
if ( rds==null ) throw new IllegalArgumentException("variable not found: "+varName );
String[] ss= rds.split("]");
int[] result= new int[ss.length];
for ( int i=0; i