001/* 002 * This file is part of McIDAS-V 003 * 004 * Copyright 2007-2025 005 * Space Science and Engineering Center (SSEC) 006 * University of Wisconsin - Madison 007 * 1225 W. Dayton Street, Madison, WI 53706, USA 008 * https://www.ssec.wisc.edu/mcidas/ 009 * 010 * All Rights Reserved 011 * 012 * McIDAS-V is built on Unidata's IDV and SSEC's VisAD libraries, and 013 * some McIDAS-V source code is based on IDV and VisAD source code. 014 * 015 * McIDAS-V is free software; you can redistribute it and/or modify 016 * it under the terms of the GNU Lesser Public License as published by 017 * the Free Software Foundation; either version 3 of the License, or 018 * (at your option) any later version. 019 * 020 * McIDAS-V is distributed in the hope that it will be useful, 021 * but WITHOUT ANY WARRANTY; without even the implied warranty of 022 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 023 * GNU Lesser Public License for more details. 024 * 025 * You should have received a copy of the GNU Lesser Public License 026 * along with this program. If not, see https://www.gnu.org/licenses/. 027 */ 028 029package edu.wisc.ssec.mcidasv.util; 030 031import org.mozilla.universalchardet.UniversalDetector; 032import org.slf4j.Logger; 033import org.slf4j.LoggerFactory; 034 035import java.io.FileInputStream; 036import java.io.IOException; 037import java.io.InputStream; 038 039/** 040 * Based on the juniversalchardet example code. 041 * 042 * This code is primarily used by the {@literal "editFile"} function in 043 * {@code interactive.py}. 044 */ 045public final class DetectCharset { 046 047 private static final Logger logger = 048 LoggerFactory.getLogger(DetectCharset.class); 049 050 private DetectCharset() { } 051 052 public static String detect(String file) throws IOException { 053 try (InputStream fis = new FileInputStream(file)) { 054 UniversalDetector detector = new UniversalDetector(null); 055 int nread; 056 byte[] buf = new byte[4096]; 057 while (((nread = fis.read(buf)) > 0) && !detector.isDone()) { 058 detector.handleData(buf, 0, nread); 059 } 060 061 detector.dataEnd(); 062 063 String encoding = detector.getDetectedCharset(); 064 if (encoding != null) { 065 logger.trace("detected encoding '{}'", encoding); 066 } else { 067 logger.trace("no encoding detected!"); 068 } 069 070 detector.reset(); 071 return encoding; 072 } 073 } 074}