/* * To change this template, choose Tools | Templates * and open the template in the editor. */ package org.autoplot.csv; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.io.PushbackInputStream; import java.util.regex.Pattern; /** * I'd still like to refactor all the table-type sources to get the common codes. * These include:

html tables *
xls, csv *
dat *

* @author jbf */ public class TableOps { /** * returns the index of the field. Supports the name, or field0, or 0, etc. * @param string the field for which we want to identify the index * @param fieldNames the field names for each column. * @return the field index, or -1 if the column doesn't exist. */ public static int getFieldIndex(String string, String[] fieldNames) { for (int i = 0; i < fieldNames.length; i++) { if (fieldNames[i].equalsIgnoreCase(string)) { return i; } } int icol= -1; if (Pattern.matches("field[0-9]+", string )) { icol= Integer.parseInt(string.substring(5)); } else if (Pattern.matches("[0-9]+", string )) { icol= Integer.parseInt(string); } if ( icol>=fieldNames.length ) { throw new IllegalArgumentException("bad column parameter: the record parser only expects "+fieldNames.length +" columns"); } return icol; } /** * returns the field index of the name, which can be:

a column name *
an implicit column name "field1" *
a column index (0 is the first column) *
a negative column index (-1 is the last column) *

* @param name * @param fieldNames the field names for each column. * @return the index of the field, or -1 if the column doesn't exist. */ public static int columnIndex( String name, String[] fieldNames ) { if ( Pattern.matches( "\\d+", name) ) { return Integer.parseInt(name); } else if ( Pattern.matches( "-\\d+", name) ) { return fieldNames.length + Integer.parseInt(name); } else if ( Pattern.matches( "field\\d+", name) ) { return Integer.parseInt( name.substring(5) ); } else { int idx= getFieldIndex(name,fieldNames); return idx; } } /** * parse range strings like "3:6", "3:-5", and "Bx_gsm-Bz_gsm" * if the delimiter is colon, then the end is exclusive. If it is "-", * then it is inclusive. For example,

3:6 -> [3,6] *
3-5 -> [3,6] *

* @param o the range string or field names, etc. * @param fieldNames the field names for each column. * @return the two-element range, where first index is inclusive, second is exclusive. * @throws java.lang.NumberFormatException */ public static int[] parseRangeStr(String o, String[] fieldNames ) throws NumberFormatException { String s = o; int first = 0; int last = fieldNames.length; if (s.contains(":")) { String[] ss = s.split(":",-2); if ( ss[0].length() > 0 ) { first = columnIndex(ss[0],fieldNames); } if ( ss[1].length() > 0 ) { last = columnIndex(ss[1],fieldNames); } } else if ( s.contains("--") ) { int isplit= s.indexOf("--",1); if ( isplit > 0 ) { first = columnIndex( s.substring(0,isplit),fieldNames); } if ( isplit < s.length()-2 ) { last = 1 + columnIndex( s.substring(isplit+1),fieldNames); } } else if ( s.contains("-") ) { String[] ss = s.split("-",-2); if ( ss[0].length() > 0 ) { first = columnIndex(ss[0],fieldNames); } if ( ss[1].length() > 0 ) { last = 1 + columnIndex(ss[1],fieldNames); } } return new int[]{first, last}; } /** * get the delimiter, either a comma or semicolon, by looking at the first * few lines of the file. The pushbackInputStream should be returned at * the zeroth byte. * @param thein the PushbackInputStream, which will be at the zeroth byte to start and the zeroth byte when this is done. * @return the delimiter. * @throws IOException */ public static char getDelim( PushbackInputStream thein ) throws IOException { char delimiter=','; BufferedReader read= new BufferedReader(new InputStreamReader(thein)); String l= read.readLine(); if ( l!=null ) { if ( l.split(";",-2).length > l.split(",",-2).length ) delimiter=';'; thein.unread( 10 ); thein.unread(l.getBytes()); return delimiter; } else { return ','; } } }