/*
 * AsciiParser.java
 *
 * Created on May 25, 2007, 7:01 AM
 *
 * To change this template, choose Tools | Template Manager
 * and open the template in the editor.
 */
package org.virbo.dsutil;

import edu.uiowa.physics.pw.das.datum.Units;
import edu.uiowa.physics.pw.das.util.DasProgressMonitor;
import edu.uiowa.physics.pw.das.util.NullProgressMonitor;
import java.io.*;
import java.text.ParseException;
import java.util.regex.*;
import org.virbo.dataset.DataSet;

/**
 * Class for reading ascii tables into a DataSet.  This parses a
 * file by looking at each line to see if it matches one of
 * two Patterns: one for properties and one for records.  If a record matched,
 * then the record is matched and fields pulled out, parsed and insered a
 * DataSetBuilder.  If a property is matched, then the builder property
 * is set.  Two Patterns are provided NAME_COLON_VALUE_PATTERN and
 * NAME_EQUAL_VALUE_PATTERN for convenience.  The record pattern is currently
 * the number of fields identified with whitespace in between.  Note the X
 * tags are just the record numbers.
 *
 * Adapted to v3.0 DataSet model, Jeremy, May 2007.
 * @author  Jeremy
 */
public class AsciiParser {
    
    Pattern propertyPattern;
    String commentRegex;
    String[] fieldNames;
    Units[] units;
    
    final static String numberPart= "[\\d\\.eE\\+\\-]+";
    final static String decimalRegex= numberPart;
    int skipLines;
    int recordCountLimit;
    int fieldCount;
    
    public final static Pattern NAME_COLON_VALUE_PATTERN= Pattern.compile("\\s*(.+?)\\s*\\:\\s*(.+)\\s*");
    public final static Pattern NAME_EQUAL_VALUE_PATTERN= Pattern.compile("\\s*(.+?)\\s*\\=\\s*(.+)\\s*");
    
    public final static String PROPERTY_FIELD_NAMES= "fieldNames";
    
    private static final String SPACES;
    private static final String DASHES;

    public static final String PROPERTY_FILE_HEADER= "file_header";
    
    static {   
        StringBuffer buf= new StringBuffer(1024);
        for ( int i=0; i<1024; i++ ) buf.append(" ");
        SPACES= buf.toString();

        buf= new StringBuffer(1024);
        for ( int i=0; i<1024; i++ ) buf.append("-");
        DASHES= buf.toString();
        
    }


    private AsciiParser( String[] fieldNames ) {
        this.fieldCount= fieldNames.length;
        this.fieldNames= fieldNames;
        this.units= new Units[fieldCount];
        for ( int i=0;i<fieldCount; i++ ) units[i]= Units.dimensionless;
        
        StringBuffer regexBuf= new StringBuffer();
        regexBuf.append("\\s*");
        for ( int i=0; i<fieldCount-1; i++ ) {
            regexBuf.append("("+decimalRegex+")[\\s+,+]\\s*");
        }
        regexBuf.append("("+decimalRegex+")\\s*");
        
        recordParser= new RegexParser( regexBuf.toString() );
        
    }
    
    /**
     * return the names of the fields found in the file, if there appear to be field names.  Someday this
     * will work like guessFieldCount to discover table field names, if available.
     */
    public static String[] guessFieldNames( String filename ) throws FileNotFoundException, IOException {
        int fieldCount= guessFieldCount(filename);
        String[] fieldNames= new String[ fieldCount ];
        for ( int i=0; i<fieldCount; i++ ) {
            fieldNames[i]= "field"+i;
        }
        return fieldNames;
    }
    
    /**
     * return the field count that would result in the largest number of records parsed.  The
     * entire file is scanned, and for each line the number of decimal fields is counted.  At the end
     * of the scan, the fieldCount with the highest record count is returned.
     */
    public static int guessFieldCount( String filename ) throws FileNotFoundException, IOException {
        
        final int maxFieldCount=10;  // can only identify maxFieldCount - 1.
        
        int[] recCount= new int[maxFieldCount];
        
        StringBuffer regexBuf= new StringBuffer();
        regexBuf.append("\\s*("+decimalRegex+")");
        for ( int i=1; i<maxFieldCount; i++ ) {
            regexBuf.append("([\\s+,+]\\s*("+decimalRegex+"))?");
        }
        regexBuf.append( "\\s*" );
        
        Pattern pat= Pattern.compile( regexBuf.toString() );
        
        BufferedReader reader= new LineNumberReader( new FileReader( filename ) );
        
        String line;
        while ( ( line=reader.readLine() ) != null ) {
            Matcher m= pat.matcher( line );
            if ( m.matches() ) {
                int j;
                for ( j=1; j<m.groupCount(); j+=2 ) {
                    if ( m.group(j)==null ) {
                        recCount[ (j-1) / 2 ] ++;
                        break;
                    }
                }
            }
        }
        int max=0;
        int imax=0;
        for ( int j=1; j<maxFieldCount; j++ ) {
            if ( recCount[j] > max ) {
                imax=j;
                max= recCount[j];
            }
        }
        
        return imax;
    }
    
    /**
     * looks at the first line, and splits it.
     * @param filename filename to read in.
     * @param delim regex to split the initial line into the fixed columns.
     * @return the delegate parser that will split each line.  (TODO: Why would I want it?)
     * @throws java.io.IOException 
     */
    public FixedColumnsParser setFixedColumnsParser( String filename, String delim ) throws IOException {
        
        BufferedReader reader= new LineNumberReader( new FileReader( filename ) );
        
        String line;
        
        line=reader.readLine();
        for ( int i=0; i<skipLines; i++ ) line=reader.readLine();
        
        reader.close();
        
        int[] columnOffsets;
        int[] columnWidths;
        FieldParser[] fieldParsers;
        
        int col=0;
        
        String[] ss= line.split(delim);
        columnOffsets= new int[ss.length];
        columnWidths= new int[ss.length-1];
        fieldParsers= new FieldParser[ss.length-1];
        
        boolean rightJustified= false;
        if ( ss[0].trim().length()==0 ) {
            rightJustified= true;
            for ( int i=0; i<ss.length-1; i++ ) ss[i]= ss[i+1];
        }
        
        columnOffsets[0]= 0;
        
        if ( rightJustified ) {
            for ( int i=1; i<ss.length; i++ ) {
                col= line.indexOf( ss[i-1], columnOffsets[i-1] );
                columnOffsets[i]=  col+ss[i-1].length();
                columnWidths[i-1]=  columnOffsets[i] - columnOffsets[i-1];
            }
        } else {
            for ( int i=1; i<ss.length; i++ ) {
                col= line.indexOf(ss[i],col+ss[i-1].length()); // account for whitespace
                columnOffsets[i]= col;
                columnWidths[i-1]= columnOffsets[i] - columnOffsets[i-1];
            }
        }
        
        
        for ( int i=1; i<ss.length; i++ ) {
            fieldParsers[i-1]= DOUBLE_PARSER;
        }
        
        int[] co= new int[columnWidths.length];
        for ( int i=0; i<columnWidths.length; i++ ){
            co[i]= columnOffsets[i];
        }
        
        FixedColumnsParser p= new FixedColumnsParser( co, columnWidths, fieldParsers );
        this.recordParser= p;
        
        return p;
    }
    
    /**
     * creates a parser with @param fieldCount fields, named "field0,...,fieldN"
     */
    public static AsciiParser newParser( int fieldCount ) {
        String[] fieldNames= new String[ fieldCount ];
        for ( int i=0; i<fieldCount; i++ ) {
            fieldNames[i]= "field"+i;
        }
        return new AsciiParser( fieldNames );
    }
    
    /**
     * creates a parser with the named fields.
     */
    public static AsciiParser newParser( String[] fieldNames ) {
        return new AsciiParser( fieldNames );
    }
    
    /**
     * skip a number of lines before trying to parse anything.
     */
    public void setSkipLines( int skipLines ) {
        this.skipLines= skipLines;
    }
    
    /**
     * limit the number of records read.  parsing will stop at this limit.
     */
    public void setRecordCountLimit( int recordCountLimit ) {
        this.recordCountLimit= recordCountLimit;
    }
    
    /**
     * specify the Pattern used to recognize properties.  Note property
     * values are not parsed, they are provided as Strings.
     */
    public void setPropertyPattern( Pattern propertyPattern ) {
        this.propertyPattern= propertyPattern;
    }
    
    
    /**
     * Parse the stream using the current settings.
     */
    private DataSet readStream( InputStream in, DasProgressMonitor mon ) throws IOException {
        BufferedReader reader= new BufferedReader( new InputStreamReader(in ) );
        String line;
        int iline=0;
        int irec=0;
        
        mon.started();
        
        DataSetBuilder builder= new DataSetBuilder( 2, 100, recordParser.fieldCount(), 1 );
        
        long bytesRead= 0;
        
        StringBuffer headerBuffer= new StringBuffer();
        
        Matcher m;
        while ( (line=reader.readLine() )!=null && !mon.isCancelled() ) {
            bytesRead+= line.length()+1; // +1 is for end-of-line
            iline++;
            mon.setTaskProgress( bytesRead );
            try {
                if ( iline<skipLines ) {
                    if ( keepFileHeader ) headerBuffer.append(line);
                    continue;
                }
                
                if ( propertyPattern!=null && ( m=propertyPattern.matcher(line) ).matches() ) {
                    builder.putProperty( m.group(1).trim(), m.group(2).trim() );
                    
                } else if ( recordParser.tryParseRecord(line,irec,builder) ) {
                    irec++;
                    builder.nextRecord();
                    
                }  else {
                    //System.out.println(line);
                }
            } catch ( NumberFormatException e ) {
                e.printStackTrace();
            }
        }
        mon.finished();
        
        builder.putProperty( PROPERTY_FILE_HEADER, headerBuffer.toString() );
        
        return builder.getDataSet();
    }
    
    public static interface RecordParser {
        /**
         * returns true if the line appears to be a record
         */
        boolean tryParseRecord( String line, int irec, DataSetBuilder builder );
        int fieldCount();
    }
    
    public static interface FieldParser {
        double parseField( String field, int columnIndex) throws ParseException;
    }
    
    public static final FieldParser DOUBLE_PARSER = new FieldParser() {
        public final double parseField( String field, int columnIndex ) {
            return Double.parseDouble(field);
        }
    };
    
    public final FieldParser UNITS_PARSER = new FieldParser() {
        public final double parseField( String field, int columnIndex ) throws ParseException {
            Units u= AsciiParser.this.units[columnIndex];
            return u.parse(field).doubleValue(u);
        }        
    };
    
    public final class RegexParser implements RecordParser {
        Pattern recordPattern;
        int fieldCount;
                
        public RegexParser( String regex ) {
            recordPattern= Pattern.compile(regex);
            this.fieldCount= recordPattern.matcher("").groupCount();
        }
        
        public int fieldCount() {
            return fieldCount;
        }
        
        public final boolean tryParseRecord( String line, int irec, DataSetBuilder builder ) {
            Matcher m;
            if ( recordPattern!=null && ( m=recordPattern.matcher(line) ).matches() ) {
                try {
                    boolean allInvalid= true;
                    for ( int i=0; i<fieldCount; i++ ) {
                        try {
                            double d= Double.parseDouble(m.group(i+1));
                            allInvalid= false;
                        } catch ( NumberFormatException e ) {}
                    }
                    if ( ! allInvalid ) {
                        for ( int i=0; i<fieldCount; i++ ) {
                            builder.putValue( irec, i, units[i].parse( m.group(i+1) ).doubleValue(units[i]) );
                        }
                        return true;
                    } else {
                        return false;
                    }
                } catch ( ParseException ex ) {
                    return false;
                }
                
            } else {
                return false;
            }
        }
        
    }
    
    public static final class FixedColumnsParser implements RecordParser {
        int[] columnOffsets;
        int[] columnWidths;
        FieldParser[] parsers;

        private int fieldCount;
        
        public FixedColumnsParser( int[] columnOffsets, int[] columnWidths, FieldParser[] parsers ) {
            this.columnOffsets= columnOffsets;
            this.columnWidths= columnWidths;
            this.parsers= parsers;
            this.fieldCount= columnOffsets.length;
        }
        
        public int fieldCount() {
            return fieldCount;
        }
        
        public final boolean tryParseRecord( String line, int irec, DataSetBuilder builder ) {
            boolean[] fails= new boolean[fieldCount];
            int failCount=0;
            
            for ( int i=0; i<fieldCount; i++ ) {
                // TODO: support for TimeLocationUnits.
                try {
                    if ( parsers[i]==null ) {
                        System.err.println("here: "+i+"  "+fieldCount);
                    }
                    double d= parsers[i].parseField( line.substring( columnOffsets[i], columnOffsets[i]+columnWidths[i] ), i );
                    builder.putValue( irec, i, d );
                } catch ( NumberFormatException ex ) {
                    failCount++;
                    fails[i]= true;
                } catch ( ParseException ex) {
                    failCount++;
                    fails[i]= true;
                }
            }

            if ( failCount>0 ) {
                System.err.println("error(s) parsing record number " +(irec) + ": ");
                System.err.println(line);
                char[] lineMarker= new char[ columnOffsets[fieldCount-1] + columnWidths[fieldCount-1] ];
                for ( int i=0; i<fieldCount; i++ ) {
                    if (fails[i]) {
                        for ( int j=0; j<columnWidths[i]; j++ ) {
                            lineMarker[ j+columnOffsets[i] ]= '-';
                        }
                    }
                }
                System.err.println( new String( lineMarker ) );
            }
            
            return true;
        }
        
    }
    
    /**
     * Parse the file using the current settings.
     * @return a rank 2 dataset.
     */
    public DataSet readFile( String filename, DasProgressMonitor mon ) throws IOException {
        long size= new File(filename).length();
        mon.setTaskSize(size);
        return readStream( new FileInputStream(filename ), mon ) ;
    }
    
    public static void main(String[] args) throws Exception {
        
        String file= "L:/ct/virbo/autoplot/data/2490lintest90005.dat";
        
        AsciiParser parser= AsciiParser.newParser(5);
        parser.setPropertyPattern(Pattern.compile("\\s*(.+)\\s*\\:\\s*(.+)\\s*"));
        long t0= System.currentTimeMillis();
        DataSet ds= parser.readFile( file, new NullProgressMonitor() );
        System.out.println(""+(System.currentTimeMillis()-t0));
        System.out.println(ds.property("Frequency"));
        System.out.flush();
        
    }
    
    /** Creates a new instance of AsciiParser */
    public AsciiParser() {
    }

    /**
     * Holds value of property keepFileHeader.
     */
    private boolean keepFileHeader;

    /**
     * Utility field used by bound properties.
     */
    private java.beans.PropertyChangeSupport propertyChangeSupport =  new java.beans.PropertyChangeSupport(this);

    /**
     * Adds a PropertyChangeListener to the listener list.
     * @param l The listener to add.
     */
    public void addPropertyChangeListener(java.beans.PropertyChangeListener l) {
        propertyChangeSupport.addPropertyChangeListener(l);
    }

    /**
     * Removes a PropertyChangeListener from the listener list.
     * @param l The listener to remove.
     */
    public void removePropertyChangeListener(java.beans.PropertyChangeListener l) {
        propertyChangeSupport.removePropertyChangeListener(l);
    }

    /**
     * Getter for property keepHeader.
     * @return Value of property keepHeader.
     */
    public boolean isKeepFileHeader() {
        return this.keepFileHeader;
    }

    /**
     * Setter for property keepHeader.  By default false but if true, the file header
     * ignored by skipLines is put into the property PROPERTY_FILE_HEADER.
     *
     * @param keepHeader New value of property keepHeader.
     */
    public void setKeepFileHeader(boolean keepHeader) {
        boolean oldKeepHeader = this.keepFileHeader;
        this.keepFileHeader = keepHeader;
        propertyChangeSupport.firePropertyChange ("keepHeader", new Boolean (oldKeepHeader), new Boolean (keepHeader));
    }

    /**
     * Holds value of property recordParser.
     */
    private RecordParser recordParser;

    /**
     * Getter for property recordParser.
     * @return Value of property recordParser.
     */
    public RecordParser getRecordParser() {
        return this.recordParser;
    }

    /**
     * Setter for property recordParser.
     * @param recordParser New value of property recordParser.
     */
    public void setRecordParser(RecordParser recordParser) {
        RecordParser oldRecordParser = this.recordParser;
        this.recordParser = recordParser;
        propertyChangeSupport.firePropertyChange ("recordParser", oldRecordParser, recordParser);
    }

    /**
     * Indexed getter for property units.
     * @param index Index of the property.
     * @return Value of the property at <CODE>index</CODE>.
     */
    public Units getUnits(int index) {
        return this.units[index];
    }

    /**
     * Indexed setter for property units.
     * @param index Index of the property.
     * @param units New value of the property at <CODE>index</CODE>.
     */
    public void setUnits(int index, Units units) {
        this.units[index] = units;
        propertyChangeSupport.firePropertyChange ("units", null, null );
    }
    
}
