#!/usr/bin/env python
"""Make a LaTeX summary of many CDFs"""

#Comments/caveats:
#1) Obviously the formatting is a bit rough and can always be updated
#2) This is showing up some deficiencies in our metadata that could be updated
#3) We do have some metadata updates prepared but not rolled out, so this
#   is already a little out of date
#4) This can be run against empty masters for EPI-Hi, but for EPI-Lo
#   it's best to run against an actual data file that has as many variables
#   populated as possible. This is because the energy variables can depend
#   on time and don't get reasonable values unless there's actual data.
#5) Similarly the summary CDFs have no masters
#6) Output name is hard-coded (cdf_summary.tex); the secondary output is
#   cdf_summary_varlist.tex (list of all variables and links to them)
#Reasonable way to run:
#python ~/scm/isois/developer/scripts/cdf_to_latex.py /home/share/data/ISOIS/level2/psp_isois_l2-summary_20190401_v1.2.0.cdf /home/share/data/EPILo/level2/psp_isois-epilo_l2-ic_20190401_v1.18.0.cdf /home/share/data/EPILo/level2/psp_isois-epilo_l2-pc_20190401_v1.15.0.cdf /home/share/data/EPILo/level2/psp_isois-epilo_l2-ie_20190401_v1.15.0.cdf /home/share/data/EPILo/level2/psp_isois-epilo_l2-pe_20190401_v1.4.0.cdf /home/share/data/EPIHi/masters/level2/*.cdf

import collections
import datetime
import functools
import re
import sys

import numpy
import spacepy.pycdf


#https://stackoverflow.com/questions/16259923/how-can-i-escape-latex-special-characters-inside-django-templates
def tex_escape(text):
    """
        :param text: a plain text message
        :return: the message escaped to appear correctly in LaTeX
    """
    conv = {
        '&': r'\&',
        '%': r'\%',
        '$': r'\$',
        '#': r'\#',
        '_': r'\_',
        '{': r'\{',
        '}': r'\}',
        '~': r'\textasciitilde{}',
        '^': r'\^{}',
        '\\': r'\textbackslash{}',
        '<': r'\textless{}',
        '>': r'\textgreater{}',
    }
    regex = re.compile('|'.join(re.escape(key) for key in sorted(conv.keys(), key = lambda item: - len(item))))
    return regex.sub(lambda match: conv[match.group()], text)


class CDFtoLaTeX(object):

    def __init__(self, *cdf_paths):
        """Initialize; just record the paths"""
        #All the variables written, indexed by logical ID
        self.vars_written = collections.defaultdict(list)
        self.cdf_paths = cdf_paths

    def main(self):
        """Make the LaTeX summary, entire file"""
        with open('cdf_summary.tex', 'wt') as self.outfile:
            for fspec in self.cdf_paths:
                with spacepy.pycdf.CDF(fspec) as f:
                    self.print_cdf(f)
        with open('cdf_summary_varlist.tex', 'wt') as summaryfile:
            for logical in sorted(self.vars_written.keys()):
                summaryfile.write(r'\subsection{{{}}}'.format(
                    tex_escape(logical)) + '\n')
                for variable in sorted(self.vars_written[logical]):
                    summaryfile.write(
                        r'\hyperref[par:{}_{}]{{''{}''}}'.format(
                            logical, variable, tex_escape(variable))
                        + '\\\\\n')

    def print_cdf(self, cdffile):
        """Print out summary of one CDF"""
        logical_source = cdffile.attrs['Logical_source'][0]
        self.outfile.write(r'\subsection{{{}}}'.format(
            tex_escape(logical_source)))
        self.outfile.write('\n')
        for a in ('Descriptor', 'Data_type', 'TEXT',
                  'Time_resolution',
                  'Acknowledgement', 'Rules_of_use'):
            for l in cdffile.attrs[a]:
                self.outfile.write(tex_escape(l) + r'\\' + '\n')
        #global attrs
        #All variables referenced from DEPENDs, so
        #shouldn't get a top-level writeup
        from_deps = list(set([
            var.attrs[a]
            for var in (var for var in cdffile.values()
                        if var.attrs['VAR_TYPE'] in ('support_data', 'data'))
            for a in var.attrs if a.startswith((
                    'DEPEND_', 'DELTA_PLUS_VAR', 'DELTA_MINUS_VAR', 'LABL_PTR_'
            ))]))
        vnames = sorted([v for v in cdffile if v not in from_deps])
        #Main ones first
        self.outfile.write(r'\subsubsection{Primary variables}' + '\n')
        for v in vnames:
            var = cdffile[v]
            if 'VARIABLE_PURPOSE' not in var.attrs \
               or 'PRIMARY_VAR' not in var.attrs['VARIABLE_PURPOSE'].split(','):
                continue
            self.print_var(var, logical_source)
        #Then all "data"
        self.outfile.write(r'\subsubsection{Other data}' + '\n')
        for v in vnames:
            var = cdffile[v]
            if ('VARIABLE_PURPOSE' in var.attrs
                and 'PRIMARY_VAR' in var.attrs['VARIABLE_PURPOSE'].split(',')) \
                or var.attrs['VAR_TYPE'] != 'data':
                continue
            self.print_var(var, logical_source)
        #Support data
        self.outfile.write(r'\subsubsection{Other support}' + '\n')
        for v in vnames:
            var = cdffile[v]
            if ('VARIABLE_PURPOSE' in var.attrs
                and 'PRIMARY_VAR' in var.attrs['VARIABLE_PURPOSE'].split(',')) \
                or var.attrs['VAR_TYPE'] != 'support_data':
                continue
            self.print_var(var, logical_source)
        #Some sort of footer?

    def print_var(self, var, logical_source=None):
        varname = var.attrs['FIELDNAM']
        self.outfile.write(r'\paragraph{{{}}}'.format(
            tex_escape(varname)) + '\n')
        if logical_source is not None:
            self.outfile.write(
                r'\label{{par:{}_{}}}'.format(logical_source, varname) + '\n')
            self.vars_written[logical_source].append(varname)
        if 'CATDESC' in var.attrs:
            self.outfile.write(tex_escape(var.attrs['CATDESC']) + '\n')
        if 'UNITS' in var.attrs and var.attrs['UNITS'].strip():
            units = var.attrs['UNITS'].strip()
            #Things to change out in units, primarily the IDL escapes
            unit_replacements = {
                'cm!E-2!Nsr!E-1!Nsec!E-1!NkeV!E-1!N':
                '$\mathrm{cm^{-2}sr^{-1}sec^{-1}keV^{-1}}$',
                '1/(cm^2 sr sec MeV)':
                '$\mathrm{cm^{-2}sr^{-1}sec^{-1}MeV^{-1}}$',
            }
            self.outfile.write(r'({})'.format(
                unit_replacements.get(units, tex_escape(units))) + '\n')
        if var.rv():
            if len(var.shape) > 1:
                self.outfile.write(r'Size: $\mathrm{{{}}}$'.format(
                    r' \times '.join([str(i) for i in var.shape[1:]])) + '\n')
            self.outfile.write(r'time-varying\\' + '\n')
        else:
            if len(var.shape) > 0:
                self.outfile.write(r'Size: $\mathrm{{{}}}$'.format(
                    ' \times '.join([str(i) for i in var.shape])) + '\n')
            self.outfile.write(r'constant\\' + '\n')
        if 'DICT_KEY' in var.attrs:
            self.outfile.write(r'\texttt{{{}}}\\'.format(
                tex_escape(var.attrs['DICT_KEY'])) + '\n')
        if 'VAR_NOTES' in var.attrs:
            self.outfile.write(tex_escape(var.attrs['VAR_NOTES'])
                               + r'\\' + '\n')
        for a in sorted(var.attrs):
            if not a.startswith('DEPEND_') or a == 'DEPEND_0':
                continue
            depno = int(a.split('_')[1])
            depvar = var.cdf_file[var.attrs[a]]
            #Skip pure metadata
            if depvar.type() in (spacepy.pycdf.const.CDF_CHAR.value,
                                 spacepy.pycdf.const.CDF_UCHAR.value):
                continue
            depattrs = depvar.attrs
            depshape = depvar.shape
            self.outfile.write(
                tex_escape(depattrs['CATDESC'] if 'CATDESC' in depattrs
                           else var.attrs[a]) + '\n')
            depvals = depvar[...].flatten()
            if len(depvals): #actually have some values
                if 'VALIDMIN' in depattrs:
                    depvals = depvals[depvals >= depattrs['VALIDMIN']]
                if 'VALIDMAX' in depattrs:
                    depvals = depvals[depvals <= depattrs['VALIDMAX']]
            if len(depvals): #Still have values
                if depvals.dtype.kind == 'f':
                    self.outfile.write(r'{:.0f} -- {:.0f}'.format(
                        min(depvals), max(depvals)) + '\n')
                else:
                    self.outfile.write(
                        r'{} -- {}'.format(min(depvals), max(depvals)) + '\n')
                if 'UNITS' in depattrs and depattrs['UNITS'].strip():
                    self.outfile.write(tex_escape(depattrs['UNITS']) + '\n')
                if len(depshape) == 1:
                    depbins = len(depvals)
                else: #Hope the last dim matches, hack for EPI-Lo energy
                    depbins = functools.reduce(
                        lambda x, d: x if d == 0 else x // d,
                        depshape[:-1], len(depvals))
                self.outfile.write('({:d} bins)'.format(depbins) + '\n')
            self.outfile.write(r'\\' + '\n')


if __name__ == '__main__':
    CDFtoLaTeX(*sys.argv[1:]).main()