001/*
002 * This file is part of McIDAS-V
003 *
004 * Copyright 2007-2026
005 * Space Science and Engineering Center (SSEC)
006 * University of Wisconsin - Madison
007 * 1225 W. Dayton Street, Madison, WI 53706, USA
008 * https://www.ssec.wisc.edu/mcidas/
009 * 
010 * All Rights Reserved
011 * 
012 * McIDAS-V is built on Unidata's IDV and SSEC's VisAD libraries, and
013 * some McIDAS-V source code is based on IDV and VisAD source code.  
014 * 
015 * McIDAS-V is free software; you can redistribute it and/or modify
016 * it under the terms of the GNU Lesser Public License as published by
017 * the Free Software Foundation; either version 3 of the License, or
018 * (at your option) any later version.
019 * 
020 * McIDAS-V is distributed in the hope that it will be useful,
021 * but WITHOUT ANY WARRANTY; without even the implied warranty of
022 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
023 * GNU Lesser Public License for more details.
024 * 
025 * You should have received a copy of the GNU Lesser Public License
026 * along with this program.  If not, see https://www.gnu.org/licenses/.
027 */
028package edu.wisc.ssec.mcidasv.data.hydra;
029
030import static edu.wisc.ssec.mcidasv.data.StatsTable.fmtMe;
031import static java.util.Arrays.asList;
032
033import java.rmi.RemoteException;
034import java.util.ArrayList;
035import java.util.Collection;
036import java.util.Collections;
037import java.util.EnumSet;
038import java.util.HashSet;
039import java.util.List;
040import java.util.Set;
041
042import org.apache.commons.math3.random.EmpiricalDistribution;
043import org.apache.commons.math3.stat.correlation.PearsonsCorrelation;
044import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
045import org.apache.commons.math3.stat.StatUtils;
046import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
047
048import visad.Data;
049import visad.FlatField;
050import visad.FunctionType;
051import visad.MathType;
052import visad.Real;
053import visad.RealTuple;
054import visad.RealTupleType;
055import visad.RealType;
056import visad.TupleType;
057import visad.VisADException;
058import visad.python.JPythonMethods;
059
060/**
061 * Used to obtain various commonly used statistics for VisAD 
062 * {@link FlatField FlatFields}.
063 */
064public class Statistics {
065    
066    /** 
067     * Various types of statistics reported by the 
068     * {@link #describe(Object...)} {@literal "function"}. 
069     */
070    enum DescribeParams {
071        HISTOGRAM,
072        LENGTH,
073        MIN,
074        MAX,
075        RANGE,
076        Q1,
077        Q2,
078        Q3,
079        IQR,
080        MEAN,
081        MODE,
082        KURTOSIS,
083        SKEWNESS,
084        STDDEV,
085        VARIANCE,
086        GOODPTS
087    }
088    
089    /** 
090     * Characters used to create {@literal "sparklines"}.
091     */
092    private static final List<Character> CHARS =
093        asList('\u2581', '\u2582', '\u2583', '\u2584', '\u2585', '\u2586',
094               '\u2587', '\u2588');
095    
096    DescriptiveStatistics[] descriptiveStats = null;
097    
098    double[][] values_x;
099    double[][] rngVals;
100    
101    int rngTupLen;
102    
103    int numPoints;
104    
105    int[] numGoodPoints;
106    
107    MathType statType;
108    
109    PearsonsCorrelation pCorrelation = null;
110
111    private static final String FLAG_NO_NAV = "NO_NAV";
112    
113    public Statistics(FlatField fltFld) throws VisADException {
114        rngVals = fltFld.getValues(false);
115        rngTupLen = rngVals.length;
116        numPoints = fltFld.getDomainSet().getLength();
117        numGoodPoints = new int[rngTupLen];
118        
119        values_x = new double[rngTupLen][];
120        
121        for (int k = 0; k < rngTupLen; k++) {
122            values_x[k] = removeMissing(rngVals[k]);
123            numGoodPoints[k] = values_x[k].length;
124        }
125        
126        descriptiveStats = new DescriptiveStatistics[rngTupLen];
127        for (int k = 0; k < rngTupLen; k++) {
128            descriptiveStats[k] = new DescriptiveStatistics(values_x[k]);
129        }
130        
131        MathType rangeType = ((FunctionType) fltFld.getType()).getRange();
132        
133        if (rangeType instanceof RealTupleType) {
134            RealType[] rttypes = ((TupleType) rangeType).getRealComponents();
135            if (rngTupLen > 1) {
136                statType = new RealTupleType(rttypes);
137            } else {
138                statType = rttypes[0];
139            }
140        } else if (rangeType instanceof RealType) {
141            statType = rangeType;
142        } else {
143            throw new VisADException("fltFld must be RealTupleType or RealType");
144        }
145        
146        pCorrelation = new PearsonsCorrelation();
147    }
148    
149    /**
150     * Get the number of points in the domain of the {@link FlatField}.
151     *
152     * @return Number of points.
153     */
154    public int numPoints() {
155        return numPoints;
156    }
157    
158    /**
159     * Get the number of non-missing points in each range component.
160     *
161     * @return Number of non-missing points.
162     */
163    public int[] getNumGoodPoints() {
164        return numGoodPoints;
165    }
166    
167    /** 
168     * Get the original range values.
169     *
170     * @return Original range values.
171     */
172    public double[][] getRngVals() {
173        return rngVals;
174    }
175    
176    /** 
177     * Get the range values actually used (missing removed).
178     * 
179     * @return Range values used.
180     */
181    public double[][] getValues() {
182        return values_x;
183    }
184    
185    private double[] removeMissing(double[] vals) {
186        int num = vals.length;
187        int cnt = 0;
188        int[] good = new int[num];
189        for (int k = 0; k < num; k++) {
190            if (!(Double.isNaN(vals[k]))) {
191                good[cnt] = k;
192                cnt++;
193            }
194        }
195        
196        if (cnt == num) {
197            return vals;
198        }
199        
200        double[] newVals = new double[cnt];
201        for (int k = 0; k < cnt; k++) {
202            newVals[k] = vals[good[k]];
203        }
204        
205        return newVals;
206    }
207    
208    private double[][] removeMissing(double[][] vals) {
209        int tupLen = vals.length;
210        double[][] newVals = new double[tupLen][];
211        for (int k = 0; k < tupLen; k++) {
212            newVals[k] = removeMissing(vals[k]);
213        }
214        return newVals;
215    }
216    
217    public Data mean() throws VisADException, RemoteException {
218        double[] stats = new double[rngTupLen];
219        for (int k = 0; k < rngTupLen; k++) {
220            stats[k] = descriptiveStats[k].getMean();
221        }
222        return makeStat(stats);
223    }
224    
225    public Data geometricMean() throws VisADException, RemoteException {
226        double[] stats = new double[rngTupLen];
227        for (int k = 0; k < rngTupLen; k++) {
228            stats[k] = descriptiveStats[k].getGeometricMean();
229        }
230        return makeStat(stats);
231    }
232    
233    
234    public Data max() throws VisADException, RemoteException {
235        double[] stats = new double[rngTupLen];
236        for (int k = 0; k < rngTupLen; k++) {
237            stats[k] = descriptiveStats[k].getMax();
238        }
239        return makeStat(stats);
240    }
241    
242    public Data min() throws VisADException, RemoteException {
243        double[] stats = new double[rngTupLen];
244        for (int k = 0; k < rngTupLen; k++) {
245            stats[k] = descriptiveStats[k].getMin();
246        }
247        return makeStat(stats);
248    }
249    
250    public Data median() throws VisADException, RemoteException {
251        double[] stats = new double[rngTupLen];
252        for (int k = 0; k < rngTupLen; k++) {
253            stats[k] = descriptiveStats[k].getPercentile(50.0);
254        }
255        return makeStat(stats);
256    }
257    
258    public Data percentile(double p) throws VisADException, RemoteException {
259        double[] stats = new double[rngTupLen];
260        for (int k = 0; k < rngTupLen; k++) {
261            stats[k] = descriptiveStats[k].getPercentile(p);
262        }
263        return makeStat(stats);
264    }
265    
266    public Data variance() throws VisADException, RemoteException {
267        double[] stats = new double[rngTupLen];
268        for (int k = 0; k < rngTupLen; k++) {
269            stats[k] = descriptiveStats[k].getVariance();
270        }
271        return makeStat(stats);
272    }
273    
274    public Data kurtosis() throws VisADException, RemoteException {
275        double[] stats = new double[rngTupLen];
276        for (int k = 0; k < rngTupLen; k++) {
277            stats[k] = descriptiveStats[k].getKurtosis();
278        }
279        return makeStat(stats);
280    }
281    
282    public Data standardDeviation() throws VisADException, RemoteException {
283        double[] stats = new double[rngTupLen];
284        for (int k = 0; k < rngTupLen; k++) {
285            stats[k] = descriptiveStats[k].getStandardDeviation();
286        }
287        return makeStat(stats);
288    }
289    
290    public Data skewness() throws VisADException, RemoteException {
291        double[] stats = new double[rngTupLen];
292        for (int k = 0; k < rngTupLen; k++) {
293            stats[k] = descriptiveStats[k].getSkewness();
294        }
295        return makeStat(stats);
296    }
297    
298    public Data correlation(FlatField fltFld)
299        throws VisADException, RemoteException {
300        double[][] values_x = this.rngVals;
301        double[][] values_y = fltFld.getValues(false);
302        
303        if (values_y.length != rngTupLen) {
304            throw new VisADException("fields must have same range tuple length");
305        }
306        
307        double[] stats = new double[rngTupLen];
308        
309        for (int k = 0; k < rngTupLen; k++) {
310            double[][] newVals = removeMissingAND(values_x[k], values_y[k]);
311            stats[k] = pCorrelation.correlation(newVals[0], newVals[1]);
312        }
313        
314        return makeStat(stats);
315    }
316    
317    private Data makeStat(double[] stats)
318        throws VisADException, RemoteException {
319        if (statType instanceof RealType) {
320            return new Real((RealType) statType, stats[0]);
321        } else if (statType instanceof RealTupleType) {
322            return new RealTuple((RealTupleType) statType, stats);
323        }
324        return null;
325    }
326    
327    private double[][] removeMissingAND(double[] vals_x, double[] vals_y) {
328        int cnt = 0;
329        int[] good = new int[vals_x.length];
330        for (int k = 0; k < vals_x.length; k++) {
331            if (!(Double.isNaN(vals_x[k])) && !(Double.isNaN(vals_y[k]))) {
332                good[cnt] = k;
333                cnt++;
334            }
335        }
336        
337        if (cnt == vals_x.length) {
338            return new double[][]{vals_x, vals_y};
339        } else {
340            double[] newVals_x = new double[cnt];
341            double[] newVals_y = new double[cnt];
342            for (int k = 0; k < cnt; k++) {
343                newVals_x[k] = vals_x[good[k]];
344                newVals_y[k] = vals_y[good[k]];
345            }
346            return new double[][]{newVals_x, newVals_y};
347        }
348    }
349    
350    /**
351     * Creates a {@literal "description"} of any {@link FlatField FlatFields} 
352     * in {@code params}.
353     * 
354     * <p>This is mostly useful from within the Jython Shell.</p>
355     * 
356     * <p>Some notes about {@code params}:
357     * <ul>
358     *     <li>Understands {@code FlatField} and {@code String} objects.</li>
359     *     <li>Strings must be found within {@link DescribeParams}.</li>
360     *     <li>Strings control descriptions returned for all fields in 
361     *     {@code params}.</li>
362     *     <li>{@code FlatField} and {@code String} objects may appear in any order.</li>
363     * </ul>
364     * </p>
365     * 
366     * @param params See description of this method. If {@code null} or empty,
367     *               nothing will happen.
368     * 
369     * @return String descriptions of any {@code FlatField} objects in 
370     *         {@code params}, with relevant strings in {@code params} 
371     *         controlling what shows up in all descriptions.
372     * 
373     * @throws VisADException if VisAD had problems.
374     * @throws RemoteException if VisAD had problems.
375     */
376    public static String describe(Object... params)
377        throws VisADException, RemoteException {
378        String result = "";
379        if (params != null) {
380            List<FlatField> fields = new ArrayList<>(params.length);
381            List<String> descs = new ArrayList<>(params.length);
382            for (Object param : params) {
383                if (param instanceof FlatField) {
384                    fields.add((FlatField) param);
385                } else if (param instanceof String) {
386                    descs.add((String) param);
387                }
388            }
389            
390            // 350 is typical size of a single, "complete" describe call with
391            // one field.
392            StringBuilder buf = new StringBuilder(350 * descs.size());
393            for (FlatField field : fields) {
394                Description d = new Description(field, descs);
395                buf.append(d.makeDescription());
396            }
397            result = buf.toString();
398        }
399        return result;
400    }
401    
402    /**
403     * Creates a {@literal "binned sparkline"} of the given 
404     * {@link FlatField FlatFields}.
405     * 
406     * @param fields {@code FlatField} objects to {@literal "visualize"} with 
407     *               sparklines.
408     * 
409     * @return String sparkline for each {@code FlatField} in {@code fields}. 
410     * 
411     * @throws VisADException if VisAD had problems.
412     * @throws RemoteException if VisAD had problems.
413     * 
414     * @see <a href="https://en.wikipedia.org/wiki/Sparkline" target="_top">Sparkline Wikipedia Article</a>
415     */
416    public static String sparkline(FlatField... fields)
417        throws VisADException, RemoteException
418    {
419        // assuming sparkline is only using 20 bins
420        StringBuilder sb = new StringBuilder(25 * fields.length);
421        for (FlatField field : fields) {
422            Statistics s = new Statistics(field);
423            sb.append(sparkline(field, s)).append('\n');
424        }
425        return sb.toString();
426    }
427    
428    public static Long[] histogram(FlatField field, int bins)
429        throws VisADException {
430        Long[] histogram = new Long[bins];
431        EmpiricalDistribution distribution = new EmpiricalDistribution(bins);
432        distribution.load(field.getValues(false)[0]);
433        int k = 0;
434        for (SummaryStatistics stats : distribution.getBinStats()) {
435            histogram[k++] = stats.getN();
436        }
437        return histogram;
438    }
439    
440    public static String sparkline(FlatField field, Statistics s)
441        throws VisADException, RemoteException 
442    {
443        Long[] values = histogram(field, 20);
444        Real sMin = (Real) s.min();
445        Real sMax = (Real) s.max();
446        Collection<Long> collection = asList(values);
447        long max = Collections.max(collection);
448        long min = Collections.min(collection);
449        float scale = (max - min) / 7f;
450        final StringBuilder buf = new StringBuilder(values.length);
451        
452        // TJJ Mar 2018 - sandwich with min/max
453        // http://mcidas.ssec.wisc.edu/inquiry-v/?inquiry=2548
454        buf.append(fmtMe((sMin).getValue()));
455        for (Long value : values) {
456            int index = Math.round((value - min) / scale);
457            buf.append(CHARS.get(index));
458        }
459        buf.append(fmtMe((sMax).getValue()));
460        
461        return buf.toString();
462    }
463
464    private static class DescribeConfig {
465            EnumSet<DescribeParams> params;
466            boolean useOffEarth = true; // default = current behavior
467        }
468    
469        private static DescribeConfig parseParams(List<String> ps) {
470        DescribeConfig config = new DescribeConfig();
471
472        Set<DescribeParams> paramSet = Collections.emptySet();
473
474        if (ps != null) {
475            paramSet = new HashSet<>(ps.size());
476
477            for (String p : ps) {
478                String up = p.toUpperCase();
479
480                // --- behavior flag ---
481                if (FLAG_NO_NAV.equals(up)) {
482                    config.useOffEarth = false;
483                    continue;
484                }
485
486                // --- statistical params ---
487                paramSet.add(DescribeParams.valueOf(up));
488            }
489        }
490
491        if (paramSet.isEmpty()) {
492            config.params = EnumSet.allOf(DescribeParams.class);
493        } else {
494            config.params = EnumSet.copyOf(paramSet);
495        }
496
497        return config;
498    }
499    
500    public static class Description {
501        
502        private final FlatField field;
503        
504        private final EnumSet<DescribeParams> params;
505        private final boolean useOffEarth;
506        
507        public Description(FlatField field, List<String> params) {
508            this.field = field;
509
510            DescribeConfig cfg = parseParams(params);
511
512            this.params = cfg.params;
513            this.useOffEarth = cfg.useOffEarth;
514        }
515        
516        public String makeDescription()
517            throws VisADException, RemoteException {
518            StringBuilder sb = new StringBuilder(1024);
519            FlatField workingField = field;
520
521            if (!useOffEarth) {
522                workingField = JPythonMethods.setMissingNoNavigation(field);
523            }
524
525            Statistics s = new Statistics(workingField);
526            int originalGood = new Statistics(field).getNumGoodPoints()[0];
527            int filteredGood = s.getNumGoodPoints()[0];
528            int navMasked = originalGood - filteredGood;                        
529            double max = ((Real) s.max()).getValue();
530            double min = ((Real) s.min()).getValue();
531            double q1 = ((Real) s.percentile(25.0)).getValue();
532            double q3 = ((Real) s.percentile(75.0)).getValue();
533            double[] modes = StatUtils.mode(workingField.getValues(false)[0]);
534            
535            StringBuilder tmp = new StringBuilder(128);
536            for (int i = 0; i < modes.length; i++) {
537                tmp.append(fmtMe(modes[i]));
538                if ((i + 1) < modes.length) {
539                    tmp.append(", ");
540                }
541            }
542            
543            String temp;
544            char endl = '\n';
545            if (params.contains(DescribeParams.HISTOGRAM)) {
546                temp = sparkline(field, s);
547                sb.append("Histogram   :  ").append(temp).append(endl);
548            }
549            if (params.contains(DescribeParams.LENGTH)) {
550                temp = String.format("%d", s.numPoints());
551                sb.append("Length      :  ").append(temp).append(endl);
552            }
553            if (params.contains(DescribeParams.MIN)) {
554                temp = fmtMe(((Real) s.min()).getValue());
555                sb.append("Min         :  ").append(temp).append(endl);
556            }
557            if (params.contains(DescribeParams.MAX)) {
558                temp = fmtMe(((Real) s.max()).getValue());
559                sb.append("Max         :  ").append(temp).append(endl);
560            }
561            if (params.contains(DescribeParams.RANGE)) {
562                temp = fmtMe(max - min);
563                sb.append("Range       :  ").append(temp).append(endl);
564            }
565            if (params.contains(DescribeParams.Q1)) {
566                sb.append("Q1          :  ").append(fmtMe(q1)).append(endl);
567            }
568            if (params.contains(DescribeParams.Q2)) {
569                temp = fmtMe(((Real) s.percentile(50.0)).getValue());
570                sb.append("Q2          :  ").append(temp).append(endl);
571            }
572            if (params.contains(DescribeParams.Q3)) {
573                sb.append("Q3          :  ").append(fmtMe(q3)).append(endl);
574            }
575            if (params.contains(DescribeParams.IQR)) {
576                temp = fmtMe(q3 - q1);
577                sb.append("IQR         :  ").append(temp).append(endl);
578            }
579            if (params.contains(DescribeParams.MEAN)) {
580                temp = fmtMe(((Real) s.mean()).getValue());
581                sb.append("Mean        :  ").append(temp).append(endl);
582            }
583            if (params.contains(DescribeParams.MODE)) {
584                sb.append("Mode        :  ").append(tmp).append(endl);
585            }
586            if (params.contains(DescribeParams.KURTOSIS)) {
587                temp = fmtMe(((Real) s.kurtosis()).getValue());
588                sb.append("Kurtosis    :  ").append(temp).append(endl);
589            }
590            if (params.contains(DescribeParams.SKEWNESS)) {
591                temp = fmtMe(((Real) s.skewness()).getValue());
592                sb.append("Skewness    :  ").append(temp).append(endl);
593            }
594            if (params.contains(DescribeParams.STDDEV)) {
595                temp = fmtMe(((Real) s.standardDeviation()).getValue());
596                sb.append("Std Dev     :  ").append(temp).append(endl);
597            }
598            if (params.contains(DescribeParams.VARIANCE)) {
599                temp = fmtMe(((Real) s.variance()).getValue());
600                sb.append("Variance    :  ").append(temp).append(endl);
601            }
602            if (params.contains(DescribeParams.GOODPTS)) {
603                temp = String.format("%d", s.getNumGoodPoints()[0]);
604                sb.append("# Good Pts  :  ").append(temp).append(endl);
605            }
606            if (!useOffEarth) {
607                sb.append("# Nav Masked:  ").append(navMasked).append('\n');
608            }
609            return sb.toString();
610        }
611    }
612}