【JDK】：java.lang.Integer源码解析

2022-09-23 18:32:34

本文对JDK8中的java.lang.Integer包装类的部分数值缓存技术、valueOf()、stringSize()、toString()、getChars()、parseInt()等进行简要分析。

Integer缓存

先来看一段代码：

Integer a1 = Integer.valueOf(13);

Integer a2 = Integer.valueOf(13);

Integer a3 = Integer.valueOf(133);

Integer a4 = Integer.valueOf(133);

System.out.println(a1 == a2);   // 输出 true

System.out.println(a3 == a4);   // 输出 false

两个输出语句具有不同的输出，在于Integer使用了一个静态内部类（嵌套类），里面包含了一个缓存数组cache[]，默认情况下，[-128, 127]之间的整数会在第一次使用时（类加载时）被自动装箱，放在cache[]数组里。区间的上限值high设置JVM参数-XX:AutoBoxCacheMax来改变，默认情况下参数为127（byte类型的范围），存储在java.lang.Integer.IntegerCache.high属性中。

    // 静态内部类实现[-128, 127]的缓存

    private static class IntegerCache {

        static final int low = -128;

        static final int high;

        static final Integer cache[];

        static {

            // high 值通过JVM进行设置，默认为127

            int h = 127;

            String integerCacheHighPropValue =

                sun.misc.VM.getSavedProperty("java.lang.Integer.IntegerCache.high");

            if (integerCacheHighPropValue != null) {

                try {

                    int i = parseInt(integerCacheHighPropValue);

                    i = Math.max(i, 127);

                    // 最大缓存上限 Integer.MAX_VALUE

                    h = Math.min(i, Integer.MAX_VALUE - (-low) -1);

                } catch( NumberFormatException nfe) {

                    // If the property cannot be parsed into an int, ignore it.

                }

            }

            high = h;

            cache = new Integer[(high - low) + 1];

            int j = low;

            for(int k = 0; k < cache.length; k++)

                cache[k] = new Integer(j++);

            // range [-128, 127] must be interned (JLS7 5.1.7)

            assert IntegerCache.high >= 127;

        }

        private IntegerCache() {}

    }

而使用Integer.valueOf()进行构造时，就使用了cache[]缓存数组。因此使用该方法构造的Integer对象如果在缓存区间内，会直接返回cache[]数组内的相应的引用，自然就是同一个对象；否则将生成一个全新的Integer对象。与此对应的，如果使用构造函数Integer()直接构造，根本没有使用到缓存数组，生成的一定是全新的Integer对象。因此使用Integer.valueOf()构造能够节省资源，提高效率。

    // 使用cache[]数组构造

    public static Integer valueOf(int i) {

        if (i >= IntegerCache.low && i <= IntegerCache.high)

            return IntegerCache.cache[i + (-IntegerCache.low)];

        return new Integer(i);

    }

    // 使用构造函数构造

    public Integer(int value) {

        this.value = value;

    }

stringSize()

这个函数不是个public权限的函数，作为内部工具方法使用。这个方法的实现是很巧妙的，避免除法、求余等，判断条件简单，效率高（采用静态field分析，而不是负责逻辑判断可以明显提高效果）。（int 最大长只有10）

    final static int [] sizeTable = { 9, 99, 999, 9999, 99999, 999999, 9999999,

                                      99999999, 999999999, Integer.MAX_VALUE };

    // Requires positive x 参数必须为正数

    static int stringSize(int x) {

        for (int i=0; ; i++)

            if (x <= sizeTable[i])

                return i+1;

    }

toString(int i , int radix)

一个整数在给定进制的字符串表示。

    public static String toString(int i, int radix) {

        if (radix < Character.MIN_RADIX || radix > Character.MAX_RADIX)

            radix = 10;

        /* 如果是10进制，使用更加快速的转换方式 */

        if (radix == 10) {

            return toString(i);

        }

        char buf[] = new char[33];

        boolean negative = (i < 0);

        int charPos = 32;  // int占4个字节，32bit

        // 以负数为基准进行处理

        if (!negative) {

            i = -i;

        }

        // 代码的简洁！！ radix为进制，最小为2，最高位36

        while (i <= -radix) {

            buf[charPos--] = digits[-(i % radix)];

            i = i / radix;

        }

        buf[charPos] = digits[-i];

        // 负数的符号位

        if (negative) {

            buf[--charPos] = '-';

        }

        return new String(buf, charPos, (33 - charPos));

    }

上面的代码使用了一个final static 的字符数组digits[]，直接根据i与进制radix的求余结果从digits[]里面取值，提高运算效率。

    /** 所有可能代表数字的字符，最高支持36进制

     * All possible chars for representing a number as a String

     */

    final static char[] digits = {

        '0' , '1' , '2' , '3' , '4' , '5' ,

        '6' , '7' , '8' , '9' , 'a' , 'b' ,

        'c' , 'd' , 'e' , 'f' , 'g' , 'h' ,

        'i' , 'j' , 'k' , 'l' , 'm' , 'n' ,

        'o' , 'p' , 'q' , 'r' , 's' , 't' ,

        'u' , 'v' , 'w' , 'x' , 'y' , 'z'

    };

toString()

toString()方法返回当前Integer对象的字符串表示。可能有人觉得上面的toString(int i, int radix)已经是通用算法了，但是JDK在并没有这样(即radix是10的情况），而是采用了效率更高的方法。

    public String toString() {

        return toString(value);

    }

    // toString()的调用方法

    // 必须先判断Integer.MIN_VALUE，因为getChars()方法中使用了i=-i

    // 以负数为基准，对于i=Integer.MIN_VALUE将会产生溢出

    public static String toString(int i) {

        if (i == Integer.MIN_VALUE)

            return "-2147483648";

        // 获取字符串表示的字符串长度，考虑了负数的符号位

        int size = (i < 0) ? stringSize(-i) + 1 : stringSize(i);

        // 将Integer数读入到char[]数组

        char[] buf = new char[size];

        getChars(i, size, buf);

        return new String(buf, true);

    }

这个算法的核心是getChars的实现，即将一个整数高效地逐位存入一个char数组中。

    // 核心代码，从后向前将Integer读入char[]字符表示数组，如果i = MIN_VALUE将会发生大数溢出

    // fail if i == Integer.MIN_VALUE

    static void getChars(int i, int index, char[] buf) {

        int q, r;

        int charPos = index;

        char sign = 0;

        if (i < 0) {

            sign = '-';

            i = -i;

        }

        // 处理超过2的16次方的大数

        // Generate two digits per iteration

        while (i >= 65536) {

            q = i / 100;

            // really: r = i - (q * 100);

            r = i - ((q << 6) + (q << 5) + (q << 2));

            i = q;

            buf [--charPos] = DigitOnes[r];  // 个位上的数字

            buf [--charPos] = DigitTens[r];  // 十位上的数字

        }

        // 处理小于2的16次方的数

        // Fall thru to fast mode for smaller numbers

        for (;;) {

            q = (i * 52429) >>> (16+3);  // 达到q=i/10的效果

            r = i - ((q << 3) + (q << 1));  // r = i-(q*10) ...

            buf [--charPos] = digits [r];

            i = q;

            if (i == 0) break;

        }

        // 符号判断

        if (sign != 0) {

            buf [--charPos] = sign;

        }

    }

    // 个位上的数字数组

    final static char [] DigitTens = {

        '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',

        '1', '1', '1', '1', '1', '1', '1', '1', '1', '1',

        '2', '2', '2', '2', '2', '2', '2', '2', '2', '2',

        '3', '3', '3', '3', '3', '3', '3', '3', '3', '3',

        '4', '4', '4', '4', '4', '4', '4', '4', '4', '4',

        '5', '5', '5', '5', '5', '5', '5', '5', '5', '5',

        '6', '6', '6', '6', '6', '6', '6', '6', '6', '6',

        '7', '7', '7', '7', '7', '7', '7', '7', '7', '7',

        '8', '8', '8', '8', '8', '8', '8', '8', '8', '8',

        '9', '9', '9', '9', '9', '9', '9', '9', '9', '9',

        } ;

    // 十位上的数字数组

    final static char [] DigitOnes = {

        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',

        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',

        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',

        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',

        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',

        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',

        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',

        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',

        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',

        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',

        } ;

getChars()分别对int型的高位的两个字节、低位的两个字节进行遍历。while部分的思想是，DigitOnes是代表个位，DigitTens代表十位，每次r可以迭代两位（r就是除以100的余数），每次找出两位数，这样有效的减少了乘除法的次数。至于移位运算，是为了提高运算速度，q*100 = q*(2^6) +q*(2^5) + q*(2^2) = 64q+32q+4q.

for循环部分，q得到i截断个位的值(q = i / 10 )。至于采用上述复杂的移位的目的是提高速度(>>>无符号右移)。q=i*(52429/216)/23≈≈i*0.1。因为这里要用i*52429>>>16更精确的表示乘以十分之八的作用，而高位的两个字节的数再乘会溢出，所以源码里进行了高位与低位用两种方式分开循环。

parseInt()

将String转为Int，相关的编程题参加剑指offer（56）：表示数值的字符串

    public static int parseInt(String s) throws NumberFormatException {

        return parseInt(s,10);

    }

    public static int parseInt(String s, int radix)

                throws NumberFormatException

    {

        /*

         * WARNING: This method may be invoked early during VM initialization

         * before IntegerCache is initialized. Care must be taken to not use

         * the valueOf method.

         */

        if (s == null) {

            throw new NumberFormatException("null");

        }

        if (radix < Character.MIN_RADIX) {

            throw new NumberFormatException("radix " + radix +

                                            " less than Character.MIN_RADIX");

        }

        if (radix > Character.MAX_RADIX) {

            throw new NumberFormatException("radix " + radix +

                                            " greater than Character.MAX_RADIX");

        }

        int result = 0;

        boolean negative = false;

        int i = 0, len = s.length();

        int limit = -Integer.MAX_VALUE;

        int multmin;

        int digit;

        if (len > 0) {

            char firstChar = s.charAt(0);

            if (firstChar < '0') { // Possible leading "+" or "-"

                if (firstChar == '-') {

                    negative = true;

                    limit = Integer.MIN_VALUE;

                } else if (firstChar != '+')

                    throw NumberFormatException.forInputString(s);

                if (len == 1) // Cannot have lone "+" or "-"

                    throw NumberFormatException.forInputString(s);

                i++;

            }

            multmin = limit / radix;

            while (i < len) {

                // Accumulating negatively avoids surprises near MAX_VALUE

                digit = Character.digit(s.charAt(i++),radix);

                if (digit < 0) {

                    throw NumberFormatException.forInputString(s);

                }

                if (result < multmin) {

                    throw NumberFormatException.forInputString(s);

                }

                result *= radix;

                if (result < limit + digit) {

                    throw NumberFormatException.forInputString(s);

                }

                result -= digit;

            }

        } else {

            throw NumberFormatException.forInputString(s);

        }

        return negative ? result : -result;

    }

源码中注意的几点：

所有的运算都是基于负数的。在toString也提到过，因为将Integer.MIN_VALUE直接变换符号会导致数值溢出。
溢出的判断技巧。multmin = limit / radix 这个数的控制，可以在乘法计算之前可判断计算之后是否溢出。同理，result < limit + digit 可在减法之前判断计算后是否溢出。

转载自： https://blog.csdn.net/u011080472/article/details/51406198

码农公寓

Integer缓存

stringSize()

toString(int i , int radix)

toString()

parseInt()

相关文章