001/*
002 * $Id: CharSequenceKeyAnalyzer.java,v 1.2 2011/03/24 16:06:35 davep Exp $
003 *
004 * This file is part of McIDAS-V
005 *
006 * Copyright 2007-2011
007 * Space Science and Engineering Center (SSEC)
008 * University of Wisconsin - Madison
009 * 1225 W. Dayton Street, Madison, WI 53706, USA
010 * https://www.ssec.wisc.edu/mcidas
011 * 
012 * All Rights Reserved
013 * 
014 * McIDAS-V is built on Unidata's IDV and SSEC's VisAD libraries, and
015 * some McIDAS-V source code is based on IDV and VisAD source code.  
016 * 
017 * McIDAS-V is free software; you can redistribute it and/or modify
018 * it under the terms of the GNU Lesser Public License as published by
019 * the Free Software Foundation; either version 3 of the License, or
020 * (at your option) any later version.
021 * 
022 * McIDAS-V is distributed in the hope that it will be useful,
023 * but WITHOUT ANY WARRANTY; without even the implied warranty of
024 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
025 * GNU Lesser Public License for more details.
026 * 
027 * You should have received a copy of the GNU Lesser Public License
028 * along with this program.  If not, see http://www.gnu.org/licenses.
029 */
030package edu.wisc.ssec.mcidasv.util.trie;
031
032import edu.wisc.ssec.mcidasv.util.trie.PatriciaTrie.KeyAnalyzer;
033
034/**
035 * Analyzes <code>CharSequence</code> keys with case sensitivity. With 
036 * <code>CharSequenceKeyAnalyzer</code> you can
037 * compare, check prefix, and determine the index of a bit.
038 * <p>
039 * A typical use case for a <code>CharSequenceKeyAnalyzer</code> is with a 
040 * {@link PatriciaTrie}.
041 * <pre>
042    PatriciaTrie&lt;String, String&gt; trie = new PatriciaTrie&lt;String, String&gt;(new CharSequenceKeyAnalyzer());
043    
044    trie.put("Lime", "Lime");
045    trie.put("LimeWire", "LimeWire");
046    trie.put("LimeRadio", "LimeRadio");
047    trie.put("Lax", "Lax");
048    trie.put("Lake", "Lake");
049    trie.put("Lovely", "Lovely");
050
051    System.out.println(trie.select("Lo"));
052    System.out.println(trie.select("Lime"));
053
054    System.out.println(trie.getPrefixedBy("La").toString());            
055
056    Output:
057        Lovely
058        Lime
059        {Lake=Lake, Lax=Lax}
060
061 * </pre>
062 * 
063 * @author Sam Berlin
064 * @author Roger Kapsi
065 */
066public class CharSequenceKeyAnalyzer implements KeyAnalyzer<CharSequence> {
067    
068    private static final long serialVersionUID = -7032449491269434877L;
069    
070    private static final int[] BITS = createIntBitMask(16);
071    
072    public static final int[] createIntBitMask(int bitCount) {
073        int[] bits = new int[bitCount];
074        for(int i = 0; i < bitCount; i++) {
075            bits[i] = 1 << (bitCount - i - 1);
076        }
077        return bits;
078    } 
079    public int length(CharSequence key) {
080        return (key != null ? key.length() * 16 : 0);
081    }
082    
083    public int bitIndex(CharSequence key,   int keyOff, int keyLength,
084                        CharSequence found, int foundOff, int foundKeyLength) {
085        boolean allNull = true;
086        
087        if(keyOff % 16 != 0 || foundOff % 16 != 0 ||
088           keyLength % 16 != 0 || foundKeyLength % 16 != 0)
089            throw new IllegalArgumentException("offsets & lengths must be at character boundaries");
090        
091        int off1 = keyOff / 16;
092        int off2 = foundOff / 16;
093        int len1 = keyLength / 16 + off1;
094        int len2 = foundKeyLength / 16 + off2;
095        int length = Math.max(len1, len2);
096        
097        // Look at each character, and if they're different
098        // then figure out which bit makes the difference
099        // and return it.
100        char k = 0, f = 0;
101        for(int i = 0; i < length; i++) {
102            int kOff = i + off1;
103            int fOff = i + off2;
104            
105            if(kOff >= len1)
106                k = 0;
107            else
108                k = key.charAt(kOff);
109            
110            if(found == null || fOff >= len2)
111                f = 0;
112            else
113                f = found.charAt(fOff);
114            
115            if(k != f) {
116               int x = k ^ f;
117               return i * 16 + (Integer.numberOfLeadingZeros(x) - 16);
118            }
119            
120            if(k != 0)
121                allNull = false;
122            
123        }
124        
125        if (allNull) {
126            return KeyAnalyzer.NULL_BIT_KEY;
127        }
128        
129        return KeyAnalyzer.EQUAL_BIT_KEY;
130    }
131    
132    public boolean isBitSet(CharSequence key, int keyLength, int bitIndex) {
133        if (key == null || bitIndex >= keyLength) {
134            return false;
135        }
136        
137        int index = bitIndex / BITS.length;
138        int bit = bitIndex - index * BITS.length;
139        return (key.charAt(index) & BITS[bit]) != 0;
140    }
141
142    public int compare(CharSequence o1, CharSequence o2) {
143        return o1.toString().compareTo(o2.toString());
144    }
145
146    public int bitsPerElement() {
147        return 16;
148    }
149
150    public boolean isPrefix(CharSequence prefix, int offset, int length, CharSequence key) {
151        if(offset % 16 != 0 || length % 16 != 0)
152            throw new IllegalArgumentException("Cannot determine prefix outside of character boundaries");
153        String s1 = prefix.subSequence(offset / 16, length / 16).toString();
154        String s2 = key.toString();
155        return s2.startsWith(s1);
156    }
157}
158