/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package org.das2.util;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
*
* @author jbf
*/
public class StringTools {
/**
* splits the string, guarding the space within protect. For example, if you have a
* comma-separable line with quoted fields, then this can be used so that a quoted field
* is not split on a comma:
*
* s= """2009,0,2,"Hot riser,spark",99"""
*
* could be split into five fields with guardedSplit(s,',','"').
* @param str the string to split.
* @param delim the delimiter we split on. (similar to s.split(delim,-2))
* @param protect character that blocks off delimiter, such as quote.
* @return
*/
public static String[] guardedSplit( String str, String delim, char protect ) {
byte[] copy= str.getBytes( Charset.forName("US-ASCII") );
if ( Pattern.compile(str).matcher( String.valueOf(delim) ).matches() ) {
throw new IllegalArgumentException("the delimiter cannot match the protect character");
}
byte hide='_';
if ( Pattern.compile(str).matcher( String.valueOf(hide) ).matches() ) {
throw new IllegalArgumentException("the delimiter cannot match _");
}
boolean inside= false;
boolean escape= false; // \"
for ( int i=0; i result= new ArrayList();
Pattern spl= Pattern.compile(delim);
Matcher m= spl.matcher(scopy);
int i=0;
while ( m.find() ) {
int i0= i;
int i1= m.start();
result.add( str.substring(i0,i1) );
i= m.end();
}
result.add( str.substring(i) );
return result.toArray( new String[result.size()] );
}
public static void main( String[] args ) {
String s= "a b c \"d \\\" e\" f";
System.err.println( guardedSplit( s, " ", '"' ).length );
}
}