001/*
002 * This file is part of McIDAS-V
003 *
004 * Copyright 2007-2024
005 * Space Science and Engineering Center (SSEC)
006 * University of Wisconsin - Madison
007 * 1225 W. Dayton Street, Madison, WI 53706, USA
008 * https://www.ssec.wisc.edu/mcidas/
009 * 
010 * All Rights Reserved
011 * 
012 * McIDAS-V is built on Unidata's IDV and SSEC's VisAD libraries, and
013 * some McIDAS-V source code is based on IDV and VisAD source code.  
014 * 
015 * McIDAS-V is free software; you can redistribute it and/or modify
016 * it under the terms of the GNU Lesser Public License as published by
017 * the Free Software Foundation; either version 3 of the License, or
018 * (at your option) any later version.
019 * 
020 * McIDAS-V is distributed in the hope that it will be useful,
021 * but WITHOUT ANY WARRANTY; without even the implied warranty of
022 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
023 * GNU Lesser Public License for more details.
024 * 
025 * You should have received a copy of the GNU Lesser Public License
026 * along with this program.  If not, see https://www.gnu.org/licenses/.
027 */
028
029package edu.wisc.ssec.mcidasv.util;
030
031import org.mozilla.universalchardet.UniversalDetector;
032import org.slf4j.Logger;
033import org.slf4j.LoggerFactory;
034
035import java.io.FileInputStream;
036import java.io.IOException;
037import java.io.InputStream;
038
039/**
040 * Based on the juniversalchardet example code.
041 *
042 * This code is primarily used by the {@literal "editFile"} function in 
043 * {@code interactive.py}.
044 */
045public final class DetectCharset {
046
047    private static final Logger logger = 
048        LoggerFactory.getLogger(DetectCharset.class);
049
050    private DetectCharset() { }
051
052    public static String detect(String file) throws IOException {
053        try (InputStream fis = new FileInputStream(file)) {
054            UniversalDetector detector = new UniversalDetector(null);
055            int nread;
056            byte[] buf = new byte[4096];
057            while (((nread = fis.read(buf)) > 0) && !detector.isDone()) {
058                detector.handleData(buf, 0, nread);
059            }
060
061            detector.dataEnd();
062
063            String encoding = detector.getDetectedCharset();
064            if (encoding != null) {
065                logger.trace("detected encoding '{}'", encoding);
066            } else {
067                logger.trace("no encoding detected!");
068            }
069
070            detector.reset();
071            return encoding;
072        }
073    }
074}