001/*
002 * This file is part of McIDAS-V
003 *
004 * Copyright 2007-2024
005 * Space Science and Engineering Center (SSEC)
006 * University of Wisconsin - Madison
007 * 1225 W. Dayton Street, Madison, WI 53706, USA
008 * https://www.ssec.wisc.edu/mcidas/
009 * 
010 * All Rights Reserved
011 * 
012 * McIDAS-V is built on Unidata's IDV and SSEC's VisAD libraries, and
013 * some McIDAS-V source code is based on IDV and VisAD source code.  
014 * 
015 * McIDAS-V is free software; you can redistribute it and/or modify
016 * it under the terms of the GNU Lesser Public License as published by
017 * the Free Software Foundation; either version 3 of the License, or
018 * (at your option) any later version.
019 * 
020 * McIDAS-V is distributed in the hope that it will be useful,
021 * but WITHOUT ANY WARRANTY; without even the implied warranty of
022 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
023 * GNU Lesser Public License for more details.
024 * 
025 * You should have received a copy of the GNU Lesser Public License
026 * along with this program.  If not, see https://www.gnu.org/licenses/.
027 */
028package edu.wisc.ssec.mcidasv.data.hydra;
029
030import static edu.wisc.ssec.mcidasv.data.StatsTable.fmtMe;
031import static java.util.Arrays.asList;
032
033import java.rmi.RemoteException;
034import java.util.ArrayList;
035import java.util.Collection;
036import java.util.Collections;
037import java.util.EnumSet;
038import java.util.HashSet;
039import java.util.List;
040import java.util.Set;
041
042import org.apache.commons.math3.random.EmpiricalDistribution;
043import org.apache.commons.math3.stat.correlation.PearsonsCorrelation;
044import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
045import org.apache.commons.math3.stat.StatUtils;
046import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
047
048import visad.Data;
049import visad.FlatField;
050import visad.FunctionType;
051import visad.MathType;
052import visad.Real;
053import visad.RealTuple;
054import visad.RealTupleType;
055import visad.RealType;
056import visad.TupleType;
057import visad.VisADException;
058
059/**
060 * Used to obtain various commonly used statistics for VisAD 
061 * {@link FlatField FlatFields}.
062 */
063public class Statistics {
064    
065    /** 
066     * Various types of statistics reported by the 
067     * {@link #describe(Object...)} {@literal "function"}. 
068     */
069    enum DescribeParams {
070        HISTOGRAM,
071        LENGTH,
072        MIN,
073        MAX,
074        RANGE,
075        Q1,
076        Q2,
077        Q3,
078        IQR,
079        MEAN,
080        MODE,
081        KURTOSIS,
082        SKEWNESS,
083        STDDEV,
084        VARIANCE,
085        GOODPTS
086    }
087    
088    /** 
089     * Characters used to create {@literal "sparklines"}.
090     */
091    private static final List<Character> CHARS =
092        asList('\u2581', '\u2582', '\u2583', '\u2584', '\u2585', '\u2586',
093               '\u2587', '\u2588');
094    
095    DescriptiveStatistics[] descriptiveStats = null;
096    
097    double[][] values_x;
098    double[][] rngVals;
099    
100    int rngTupLen;
101    
102    int numPoints;
103    
104    int[] numGoodPoints;
105    
106    MathType statType;
107    
108    PearsonsCorrelation pCorrelation = null;
109    
110    public Statistics(FlatField fltFld) throws VisADException {
111        rngVals = fltFld.getValues(false);
112        rngTupLen = rngVals.length;
113        numPoints = fltFld.getDomainSet().getLength();
114        numGoodPoints = new int[rngTupLen];
115        
116        values_x = new double[rngTupLen][];
117        
118        for (int k = 0; k < rngTupLen; k++) {
119            values_x[k] = removeMissing(rngVals[k]);
120            numGoodPoints[k] = values_x[k].length;
121        }
122        
123        descriptiveStats = new DescriptiveStatistics[rngTupLen];
124        for (int k = 0; k < rngTupLen; k++) {
125            descriptiveStats[k] = new DescriptiveStatistics(values_x[k]);
126        }
127        
128        MathType rangeType = ((FunctionType) fltFld.getType()).getRange();
129        
130        if (rangeType instanceof RealTupleType) {
131            RealType[] rttypes = ((TupleType) rangeType).getRealComponents();
132            if (rngTupLen > 1) {
133                statType = new RealTupleType(rttypes);
134            } else {
135                statType = rttypes[0];
136            }
137        } else if (rangeType instanceof RealType) {
138            statType = rangeType;
139        } else {
140            throw new VisADException("fltFld must be RealTupleType or RealType");
141        }
142        
143        pCorrelation = new PearsonsCorrelation();
144    }
145    
146    /**
147     * Get the number of points in the domain of the {@link FlatField}.
148     *
149     * @return Number of points.
150     */
151    public int numPoints() {
152        return numPoints;
153    }
154    
155    /**
156     * Get the number of non-missing points in each range component.
157     *
158     * @return Number of non-missing points.
159     */
160    public int[] getNumGoodPoints() {
161        return numGoodPoints;
162    }
163    
164    /** 
165     * Get the original range values.
166     *
167     * @return Original range values.
168     */
169    public double[][] getRngVals() {
170        return rngVals;
171    }
172    
173    /** 
174     * Get the range values actually used (missing removed).
175     * 
176     * @return Range values used.
177     */
178    public double[][] getValues() {
179        return values_x;
180    }
181    
182    private double[] removeMissing(double[] vals) {
183        int num = vals.length;
184        int cnt = 0;
185        int[] good = new int[num];
186        for (int k = 0; k < num; k++) {
187            if (!(Double.isNaN(vals[k]))) {
188                good[cnt] = k;
189                cnt++;
190            }
191        }
192        
193        if (cnt == num) {
194            return vals;
195        }
196        
197        double[] newVals = new double[cnt];
198        for (int k = 0; k < cnt; k++) {
199            newVals[k] = vals[good[k]];
200        }
201        
202        return newVals;
203    }
204    
205    private double[][] removeMissing(double[][] vals) {
206        int tupLen = vals.length;
207        double[][] newVals = new double[tupLen][];
208        for (int k = 0; k < tupLen; k++) {
209            newVals[k] = removeMissing(vals[k]);
210        }
211        return newVals;
212    }
213    
214    public Data mean() throws VisADException, RemoteException {
215        double[] stats = new double[rngTupLen];
216        for (int k = 0; k < rngTupLen; k++) {
217            stats[k] = descriptiveStats[k].getMean();
218        }
219        return makeStat(stats);
220    }
221    
222    public Data geometricMean() throws VisADException, RemoteException {
223        double[] stats = new double[rngTupLen];
224        for (int k = 0; k < rngTupLen; k++) {
225            stats[k] = descriptiveStats[k].getGeometricMean();
226        }
227        return makeStat(stats);
228    }
229    
230    
231    public Data max() throws VisADException, RemoteException {
232        double[] stats = new double[rngTupLen];
233        for (int k = 0; k < rngTupLen; k++) {
234            stats[k] = descriptiveStats[k].getMax();
235        }
236        return makeStat(stats);
237    }
238    
239    public Data min() throws VisADException, RemoteException {
240        double[] stats = new double[rngTupLen];
241        for (int k = 0; k < rngTupLen; k++) {
242            stats[k] = descriptiveStats[k].getMin();
243        }
244        return makeStat(stats);
245    }
246    
247    public Data median() throws VisADException, RemoteException {
248        double[] stats = new double[rngTupLen];
249        for (int k = 0; k < rngTupLen; k++) {
250            stats[k] = descriptiveStats[k].getPercentile(50.0);
251        }
252        return makeStat(stats);
253    }
254    
255    public Data percentile(double p) throws VisADException, RemoteException {
256        double[] stats = new double[rngTupLen];
257        for (int k = 0; k < rngTupLen; k++) {
258            stats[k] = descriptiveStats[k].getPercentile(p);
259        }
260        return makeStat(stats);
261    }
262    
263    public Data variance() throws VisADException, RemoteException {
264        double[] stats = new double[rngTupLen];
265        for (int k = 0; k < rngTupLen; k++) {
266            stats[k] = descriptiveStats[k].getVariance();
267        }
268        return makeStat(stats);
269    }
270    
271    public Data kurtosis() throws VisADException, RemoteException {
272        double[] stats = new double[rngTupLen];
273        for (int k = 0; k < rngTupLen; k++) {
274            stats[k] = descriptiveStats[k].getKurtosis();
275        }
276        return makeStat(stats);
277    }
278    
279    public Data standardDeviation() throws VisADException, RemoteException {
280        double[] stats = new double[rngTupLen];
281        for (int k = 0; k < rngTupLen; k++) {
282            stats[k] = descriptiveStats[k].getStandardDeviation();
283        }
284        return makeStat(stats);
285    }
286    
287    public Data skewness() throws VisADException, RemoteException {
288        double[] stats = new double[rngTupLen];
289        for (int k = 0; k < rngTupLen; k++) {
290            stats[k] = descriptiveStats[k].getSkewness();
291        }
292        return makeStat(stats);
293    }
294    
295    public Data correlation(FlatField fltFld)
296        throws VisADException, RemoteException {
297        double[][] values_x = this.rngVals;
298        double[][] values_y = fltFld.getValues(false);
299        
300        if (values_y.length != rngTupLen) {
301            throw new VisADException("fields must have same range tuple length");
302        }
303        
304        double[] stats = new double[rngTupLen];
305        
306        for (int k = 0; k < rngTupLen; k++) {
307            double[][] newVals = removeMissingAND(values_x[k], values_y[k]);
308            stats[k] = pCorrelation.correlation(newVals[0], newVals[1]);
309        }
310        
311        return makeStat(stats);
312    }
313    
314    private Data makeStat(double[] stats)
315        throws VisADException, RemoteException {
316        if (statType instanceof RealType) {
317            return new Real((RealType) statType, stats[0]);
318        } else if (statType instanceof RealTupleType) {
319            return new RealTuple((RealTupleType) statType, stats);
320        }
321        return null;
322    }
323    
324    private double[][] removeMissingAND(double[] vals_x, double[] vals_y) {
325        int cnt = 0;
326        int[] good = new int[vals_x.length];
327        for (int k = 0; k < vals_x.length; k++) {
328            if (!(Double.isNaN(vals_x[k])) && !(Double.isNaN(vals_y[k]))) {
329                good[cnt] = k;
330                cnt++;
331            }
332        }
333        
334        if (cnt == vals_x.length) {
335            return new double[][]{vals_x, vals_y};
336        } else {
337            double[] newVals_x = new double[cnt];
338            double[] newVals_y = new double[cnt];
339            for (int k = 0; k < cnt; k++) {
340                newVals_x[k] = vals_x[good[k]];
341                newVals_y[k] = vals_y[good[k]];
342            }
343            return new double[][]{newVals_x, newVals_y};
344        }
345    }
346    
347    /**
348     * Creates a {@literal "description"} of any {@link FlatField FlatFields} 
349     * in {@code params}.
350     * 
351     * <p>This is mostly useful from within the Jython Shell.</p>
352     * 
353     * <p>Some notes about {@code params}:
354     * <ul>
355     *     <li>Understands {@code FlatField} and {@code String} objects.</li>
356     *     <li>Strings must be found within {@link DescribeParams}.</li>
357     *     <li>Strings control descriptions returned for all fields in 
358     *     {@code params}.</li>
359     *     <li>{@code FlatField} and {@code String} objects may appear in any order.</li>
360     * </ul>
361     * </p>
362     * 
363     * @param params See description of this method. If {@code null} or empty,
364     *               nothing will happen.
365     * 
366     * @return String descriptions of any {@code FlatField} objects in 
367     *         {@code params}, with relevant strings in {@code params} 
368     *         controlling what shows up in all descriptions.
369     * 
370     * @throws VisADException if VisAD had problems.
371     * @throws RemoteException if VisAD had problems.
372     */
373    public static String describe(Object... params)
374        throws VisADException, RemoteException {
375        String result = "";
376        if (params != null) {
377            List<FlatField> fields = new ArrayList<>(params.length);
378            List<String> descs = new ArrayList<>(params.length);
379            for (Object param : params) {
380                if (param instanceof FlatField) {
381                    fields.add((FlatField) param);
382                } else if (param instanceof String) {
383                    descs.add((String) param);
384                }
385            }
386            
387            // 350 is typical size of a single, "complete" describe call with
388            // one field.
389            StringBuilder buf = new StringBuilder(350 * descs.size());
390            for (FlatField field : fields) {
391                Description d = new Description(field, descs);
392                buf.append(d.makeDescription());
393            }
394            result = buf.toString();
395        }
396        return result;
397    }
398    
399    /**
400     * Creates a {@literal "binned sparkline"} of the given 
401     * {@link FlatField FlatFields}.
402     * 
403     * @param fields {@code FlatField} objects to {@literal "visualize"} with 
404     *               sparklines.
405     * 
406     * @return String sparkline for each {@code FlatField} in {@code fields}. 
407     * 
408     * @throws VisADException if VisAD had problems.
409     * @throws RemoteException if VisAD had problems.
410     * 
411     * @see <a href="https://en.wikipedia.org/wiki/Sparkline" target="_top">Sparkline Wikipedia Article</a>
412     */
413    public static String sparkline(FlatField... fields)
414        throws VisADException, RemoteException
415    {
416        // assuming sparkline is only using 20 bins
417        StringBuilder sb = new StringBuilder(25 * fields.length);
418        for (FlatField field : fields) {
419            Statistics s = new Statistics(field);
420            sb.append(sparkline(field, s)).append('\n');
421        }
422        return sb.toString();
423    }
424    
425    public static Long[] histogram(FlatField field, int bins)
426        throws VisADException {
427        Long[] histogram = new Long[bins];
428        EmpiricalDistribution distribution = new EmpiricalDistribution(bins);
429        distribution.load(field.getValues(false)[0]);
430        int k = 0;
431        for (SummaryStatistics stats : distribution.getBinStats()) {
432            histogram[k++] = stats.getN();
433        }
434        return histogram;
435    }
436    
437    public static String sparkline(FlatField field, Statistics s)
438        throws VisADException, RemoteException 
439    {
440        Long[] values = histogram(field, 20);
441        Real sMin = (Real) s.min();
442        Real sMax = (Real) s.max();
443        Collection<Long> collection = asList(values);
444        long max = Collections.max(collection);
445        long min = Collections.min(collection);
446        float scale = (max - min) / 7f;
447        final StringBuilder buf = new StringBuilder(values.length);
448        
449        // TJJ Mar 2018 - sandwich with min/max
450        // http://mcidas.ssec.wisc.edu/inquiry-v/?inquiry=2548
451        buf.append(fmtMe((sMin).getValue()));
452        for (Long value : values) {
453            int index = Math.round((value - min) / scale);
454            buf.append(CHARS.get(index));
455        }
456        buf.append(fmtMe((sMax).getValue()));
457        
458        return buf.toString();
459    }
460    
461    private static EnumSet<DescribeParams> parseParams(List<String> ps) {
462        Set<DescribeParams> params = Collections.emptySet();
463        if (ps != null) {
464            params = new HashSet<>(ps.size());
465            for (String p : ps) {
466                params.add(DescribeParams.valueOf(p.toUpperCase()));
467            }
468        }
469        return EnumSet.copyOf(params);
470    }
471    
472    public static class Description {
473        
474        private final FlatField field;
475        
476        private final EnumSet<DescribeParams> params;
477        
478        public Description(FlatField field, List<String> params) {
479            this.field = field;
480            if ((params == null) || params.isEmpty()) {
481                this.params = EnumSet.allOf(DescribeParams.class);
482            } else {
483                this.params = parseParams(params);
484            }
485        }
486        
487        public String makeDescription()
488            throws VisADException, RemoteException {
489            StringBuilder sb = new StringBuilder(1024);
490            Statistics s = new Statistics(field);
491            double max = ((Real) s.max()).getValue();
492            double min = ((Real) s.min()).getValue();
493            double q1 = ((Real) s.percentile(25.0)).getValue();
494            double q3 = ((Real) s.percentile(75.0)).getValue();
495            double[] modes = StatUtils.mode(field.getValues(false)[0]);
496            
497            StringBuilder tmp = new StringBuilder(128);
498            for (int i = 0; i < modes.length; i++) {
499                tmp.append(fmtMe(modes[i]));
500                if ((i + 1) < modes.length) {
501                    tmp.append(", ");
502                }
503            }
504            
505            String temp;
506            char endl = '\n';
507            if (params.contains(DescribeParams.HISTOGRAM)) {
508                temp = sparkline(field, s);
509                sb.append("Histogram :  ").append(temp).append(endl);
510            }
511            if (params.contains(DescribeParams.LENGTH)) {
512                temp = String.format("%d", s.numPoints());
513                sb.append("Length    :  ").append(temp).append(endl);
514            }
515            if (params.contains(DescribeParams.MIN)) {
516                temp = fmtMe(((Real) s.min()).getValue());
517                sb.append("Min       :  ").append(temp).append(endl);
518            }
519            if (params.contains(DescribeParams.MAX)) {
520                temp = fmtMe(((Real) s.max()).getValue());
521                sb.append("Max       :  ").append(temp).append(endl);
522            }
523            if (params.contains(DescribeParams.RANGE)) {
524                temp = fmtMe(max - min);
525                sb.append("Range     :  ").append(temp).append(endl);
526            }
527            if (params.contains(DescribeParams.Q1)) {
528                sb.append("Q1        :  ").append(fmtMe(q1)).append(endl);
529            }
530            if (params.contains(DescribeParams.Q2)) {
531                temp = fmtMe(((Real) s.percentile(50.0)).getValue());
532                sb.append("Q2        :  ").append(temp).append(endl);
533            }
534            if (params.contains(DescribeParams.Q3)) {
535                sb.append("Q3        :  ").append(fmtMe(q3)).append(endl);
536            }
537            if (params.contains(DescribeParams.IQR)) {
538                temp = fmtMe(q3 - q1);
539                sb.append("IQR       :  ").append(temp).append(endl);
540            }
541            if (params.contains(DescribeParams.MEAN)) {
542                temp = fmtMe(((Real) s.mean()).getValue());
543                sb.append("Mean      :  ").append(temp).append(endl);
544            }
545            if (params.contains(DescribeParams.MODE)) {
546                sb.append("Mode      :  ").append(tmp).append(endl);
547            }
548            if (params.contains(DescribeParams.KURTOSIS)) {
549                temp = fmtMe(((Real) s.kurtosis()).getValue());
550                sb.append("Kurtosis  :  ").append(temp).append(endl);
551            }
552            if (params.contains(DescribeParams.SKEWNESS)) {
553                temp = fmtMe(((Real) s.skewness()).getValue());
554                sb.append("Skewness  :  ").append(temp).append(endl);
555            }
556            if (params.contains(DescribeParams.STDDEV)) {
557                temp = fmtMe(((Real) s.standardDeviation()).getValue());
558                sb.append("Std Dev   :  ").append(temp).append(endl);
559            }
560            if (params.contains(DescribeParams.VARIANCE)) {
561                temp = fmtMe(((Real) s.variance()).getValue());
562                sb.append("Variance  :  ").append(temp).append(endl);
563            }
564            if (params.contains(DescribeParams.GOODPTS)) {
565                temp = String.format("%d", s.getNumGoodPoints()[0]);
566                sb.append("# Good Pts:  ").append(temp).append(endl);
567            }
568            return sb.toString();
569        }
570    }
571}