title: MySQL · mysql · mysql 查询结果异常分析
author: 张远
现象
查询条件类型变化后,查询出了不正确的结果。
create table t1(id int primary key, a varchar(50) DEFAULT NULL, key idx_a(a)) engine=innodb;
show create table t1;
insert into t1 values(1,'6036000240201612190005565273');
insert into t1 values(2,'6036000240201611150005564192');
select * from t1 where a='6036000240201612190005565273';
+----+------------------------------+
| id | a |
+----+------------------------------+
| 1 | 6036000240201612190005565273 |
+----+------------------------------+
//多了一行不一致的数据
select * from t1 where a=6036000240201612190005565273;
+----+------------------------------+
| id | a |
+----+------------------------------+
| 2 | 6036000240201611150005564192 |
| 1 | 6036000240201612190005565273 |
+----+------------------------------+
分析
索引问题
首先我们要确定数据是否存在问题,我们注意到字段a上有索引idx_a,而且两个查询都走了此索引。
explain select * from t1 where a='6036000240201612190005565273';
+----+-------------+-------+------+---------------+-------+---------+-------+------+--------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+-------+------+---------------+-------+---------+-------+------+--------------------------+
| 1 | SIMPLE | t1 | ref | idx_a | idx_a | 153 | const | 1 | Using where; Using index |
+----+-------------+-------+------+---------------+-------+---------+-------+------+--------------------------+
explain select * from t1 where a=6036000240201612190005565273;
+----+-------------+-------+-------+---------------+-------+---------+------+------+--------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+-------+-------+---------------+-------+---------+------+------+--------------------------+
| 1 | SIMPLE | t1 | index | idx_a | idx_a | 153 | NULL | 2 | Using where; Using index |
+----+-------------+-------+-------+---------------+-------+---------+------+------+--------------------------+
一种可能的情况,由于BUG导致二级索引与主键不一致,此种情况我们可通过重建索引修复。
于是删除索引idx_a,再来通过主键索引查询看看
alter table t1 drop key idx_a;
explain select * from t1 where a=6036000240201612190005565273;
+----+-------------+-------+------+---------------+------+---------+------+------+-------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+-------+------+---------------+------+---------+------+------+-------------+
| 1 | SIMPLE | t1 | ALL | NULL | NULL | NULL | NULL | 2 | Using where |
+----+-------------+-------+------+---------------+------+---------+------+------+-------------+
explain select * from t1 where a='6036000240201612190005565273';
+----+-------------+-------+------+---------------+------+---------+------+------+-------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+-------+------+---------------+------+---------+------+------+-------------+
| 1 | SIMPLE | t1 | ALL | NULL | NULL | NULL | NULL | 2 | Using where |
+----+-------------+-------+------+---------------+------+---------+------+------+-------------+
1 row in set (0.00 sec)
select * from t1 where a=6036000240201612190005565273;
+----+------------------------------+
| id | a |
+----+------------------------------+
| 1 | 6036000240201612190005565273 |
| 2 | 6036000240201611150005564192 |
+----+------------------------------+
2 rows in set (0.00 sec)
select * from t1 where a='6036000240201612190005565273';
+----+------------------------------+
| id | a |
+----+------------------------------+
| 1 | 6036000240201612190005565273 |
+----+------------------------------+
1 row in set (0.00 sec)
然而,结果与删除索引前一致。排除了索引的问题,我们只能从源码中来寻找答案了。
查源码
问题出在where条件上,我可以把断点放在条件检查的总入口evaluate_join_record这里,然后一步步跟进下去。
- 先看条件 a=6036000240201612190005565273
根据比较表达式参数的类型来决定比较时内部使用的比较函数,a:STRING_RESUL b: DECIMAL_RESULT最后得到按REAL_RESULT类型进行比较
Item_result item_cmp_type(Item_result a,Item_result b)
{
if (a == STRING_RESULT && b == STRING_RESULT)
return STRING_RESULT;
if (a == INT_RESULT && b == INT_RESULT)
return INT_RESULT;
else if (a == ROW_RESULT || b == ROW_RESULT)
return ROW_RESULT;
if ((a == INT_RESULT || a == DECIMAL_RESULT) &&
(b == INT_RESULT || b == DECIMAL_RESULT))
return DECIMAL_RESULT;
return REAL_RESULT;
}
这里REAL_RESULT类型比较对应的比较函数为Arg_comparator::compare_real
int Arg_comparator::compare_real()
{
/*
Fix yet another manifestation of Bug#2338. 'Volatile' will instruct
gcc to flush double values out of 80-bit Intel FPU registers before
performing the comparison.
*/
volatile double val1, val2;
val1= (*a)->val_real();
if (!(*a)->null_value)
{
val2= (*b)->val_real();
if (!(*b)->null_value)
{
if (set_null)
owner->null_value= 0;
if (val1 < val2) return -1;
if (val1 == val2) return 0;
return 1;
}
}
if (set_null)
owner->null_value= 1;
return -1;
}
compare_real 会把a值转化为double类型再比较((*a)->val_real()),最终得到的转化函数为my_strtod
由于精度问题最后字符串'6036000240201612190005565273'会转化为6.0360002402016117e+27,会损失精度。
同时对于比较表达式的右值数字6036000240201612190005565273在内部表示为Item_decimal,在compare_real时也会通过(*b)->val_real(),调用Item_decimal::val_real,最终也是调用my_strtod,转化后的值也为6.0360002402016117e+27
而对于表中另外一个值'6036000240201611150005564192'通过上述转化也6.0360002402016117e+27
因此对于条件 a=6036000240201612190005565273最后返回了两行。
- 再看条件 a='6036000240201612190005565273'
这个两边都是Field_varstring类型,最终使用的比较函数是Arg_comparator::compare_string。此函数比较时字符串精度不会丢失,比较操作是精确的,因此最终只返回了一行。
结论
最终问题的原因是比较时做类型转化时丢失了精度,导致比较出错。对于字符串转double的情况下,只保留了16位小数。
可以做个实验
insert into t1 values(3,'6036000240201611');
insert into t1 values(4,'60360002402016111');
select * from t1 where a=60360002402016111;
+----+-------------------+
| id | a |
+----+-------------------+
| 4 | 60360002402016111 |
+----+-------------------+
elect * from t1 where a=6036000240201611;
+----+------------------+
| id | a |
+----+------------------+
| 3 | 6036000240201611 |
+----+------------------+
1 row in set (0.01 sec)
//小数位16位,出现异常
select * from t1 where a=60360002402016112;
+----+-------------------+
| id | a |
+----+-------------------+
| 4 | 60360002402016111 |
+----+-------------------+
1 row in set (0.01 sec)
//小数位15位,没有问题
mysql> select * from t1 where a=6036000240201612;
Empty set (0.00 sec)
实际上mysql 对于float,double小数的处理是不精确的,使用时应格外注意。官方也有很有意思的例子,有兴趣的可以看看。