001/*
002 * This file is part of McIDAS-V
003 *
004 * Copyright 2007-2023
005 * Space Science and Engineering Center (SSEC)
006 * University of Wisconsin - Madison
007 * 1225 W. Dayton Street, Madison, WI 53706, USA
008 * https://www.ssec.wisc.edu/mcidas
009 * 
010 * All Rights Reserved
011 * 
012 * McIDAS-V is built on Unidata's IDV and SSEC's VisAD libraries, and
013 * some McIDAS-V source code is based on IDV and VisAD source code.  
014 * 
015 * McIDAS-V is free software; you can redistribute it and/or modify
016 * it under the terms of the GNU Lesser Public License as published by
017 * the Free Software Foundation; either version 3 of the License, or
018 * (at your option) any later version.
019 * 
020 * McIDAS-V is distributed in the hope that it will be useful,
021 * but WITHOUT ANY WARRANTY; without even the implied warranty of
022 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
023 * GNU Lesser Public License for more details.
024 * 
025 * You should have received a copy of the GNU Lesser Public License
026 * along with this program.  If not, see http://www.gnu.org/licenses.
027 */
028package edu.wisc.ssec.mcidasv.util.trie;
029
030import edu.wisc.ssec.mcidasv.util.trie.PatriciaTrie.KeyAnalyzer;
031
032/**
033 * Analyzes {@code CharSequence} keys with case sensitivity. With
034 * {@code CharSequenceKeyAnalyzer} you can
035 * compare, check prefix, and determine the index of a bit.
036 * <p>
037 * A typical use case for a {@code CharSequenceKeyAnalyzer} is with a
038 * {@link PatriciaTrie}.
039 * <pre>
040    PatriciaTrie&lt;String, String&gt; trie = new PatriciaTrie&lt;String, String&gt;(new CharSequenceKeyAnalyzer());
041    
042    trie.put("Lime", "Lime");
043    trie.put("LimeWire", "LimeWire");
044    trie.put("LimeRadio", "LimeRadio");
045    trie.put("Lax", "Lax");
046    trie.put("Lake", "Lake");
047    trie.put("Lovely", "Lovely");
048
049    System.out.println(trie.select("Lo"));
050    System.out.println(trie.select("Lime"));
051
052    System.out.println(trie.getPrefixedBy("La").toString());            
053
054    Output:
055        Lovely
056        Lime
057        {Lake=Lake, Lax=Lax}
058
059 * </pre>
060 * 
061 * @author Sam Berlin
062 * @author Roger Kapsi
063 */
064public class CharSequenceKeyAnalyzer implements KeyAnalyzer<CharSequence> {
065    
066    private static final long serialVersionUID = -7032449491269434877L;
067    
068    private static final int[] BITS = createIntBitMask(16);
069    
070    public static final int[] createIntBitMask(int bitCount) {
071        int[] bits = new int[bitCount];
072        for(int i = 0; i < bitCount; i++) {
073            bits[i] = 1 << (bitCount - i - 1);
074        }
075        return bits;
076    } 
077    public int length(CharSequence key) {
078        return (key != null ? key.length() * 16 : 0);
079    }
080    
081    public int bitIndex(CharSequence key,   int keyOff, int keyLength,
082                        CharSequence found, int foundOff, int foundKeyLength) {
083        boolean allNull = true;
084        
085        if(keyOff % 16 != 0 || foundOff % 16 != 0 ||
086           keyLength % 16 != 0 || foundKeyLength % 16 != 0)
087            throw new IllegalArgumentException("offsets & lengths must be at character boundaries");
088        
089        int off1 = keyOff / 16;
090        int off2 = foundOff / 16;
091        int len1 = keyLength / 16 + off1;
092        int len2 = foundKeyLength / 16 + off2;
093        int length = Math.max(len1, len2);
094        
095        // Look at each character, and if they're different
096        // then figure out which bit makes the difference
097        // and return it.
098        char k = 0, f = 0;
099        for(int i = 0; i < length; i++) {
100            int kOff = i + off1;
101            int fOff = i + off2;
102            
103            if(kOff >= len1)
104                k = 0;
105            else
106                k = key.charAt(kOff);
107            
108            if(found == null || fOff >= len2)
109                f = 0;
110            else
111                f = found.charAt(fOff);
112            
113            if(k != f) {
114               int x = k ^ f;
115               return i * 16 + (Integer.numberOfLeadingZeros(x) - 16);
116            }
117            
118            if(k != 0)
119                allNull = false;
120            
121        }
122        
123        if (allNull) {
124            return KeyAnalyzer.NULL_BIT_KEY;
125        }
126        
127        return KeyAnalyzer.EQUAL_BIT_KEY;
128    }
129    
130    public boolean isBitSet(CharSequence key, int keyLength, int bitIndex) {
131        if (key == null || bitIndex >= keyLength) {
132            return false;
133        }
134        
135        int index = bitIndex / BITS.length;
136        int bit = bitIndex - index * BITS.length;
137        return (key.charAt(index) & BITS[bit]) != 0;
138    }
139
140    public int compare(CharSequence o1, CharSequence o2) {
141        return o1.toString().compareTo(o2.toString());
142    }
143
144    public int bitsPerElement() {
145        return 16;
146    }
147
148    public boolean isPrefix(CharSequence prefix, int offset, int length, CharSequence key) {
149        if(offset % 16 != 0 || length % 16 != 0)
150            throw new IllegalArgumentException("Cannot determine prefix outside of character boundaries");
151        String s1 = prefix.subSequence(offset / 16, length / 16).toString();
152        String s2 = key.toString();
153        return s2.startsWith(s1);
154    }
155}
156