memcpy、memmove与内存重叠

memcpy、memmove与内存重叠

问题

  • 更换软件运行平台后,使用memcpy将已处理数据从接收缓冲区移出后,内存数据被破坏

  • 平台

    $ gcc -v
    Using built-in specs.
    COLLECT_GCC=gcc
    COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/4.9/lto-wrapper
    Target: x86_64-linux-gnu
    Configured with: ../src/configure -v --with-pkgversion='Linx 4.9.2-10-linx1' --with-bugurl=file:///usr/share/doc/gcc-4.9/README.Bugs --enable-languages=c,c++,java,go,d,fortran,objc,obj-c++ --prefix=/usr --program-suffix=-4.9 --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --with-gxx-include-dir=/usr/include/c++/4.9 --libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --with-system-zlib --disable-browser-plugin --enable-java-awt=gtk --enable-gtk-cairo --with-java-home=/usr/lib/jvm/java-1.5.0-gcj-4.9-amd64/jre --enable-java-home --with-jvm-root-dir=/usr/lib/jvm/java-1.5.0-gcj-4.9-amd64 --with-jvm-jar-dir=/usr/lib/jvm-exports/java-1.5.0-gcj-4.9-amd64 --with-arch-directory=amd64 --with-ecj-jar=/usr/share/java/eclipse-ecj.jar --enable-objc-gc --enable-multiarch --with-arch-32=i586 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu
    Thread model: posix
    gcc version 4.9.2 (Linx 4.9.2-10-linx1) 
    
    $ uname -a
    Linux localhost 4.9.0-0.bpo.1-linx-security-amd64 #1 SMP Linx 4.9.2-2~bpo8+1linx4 (2018-08-17) x86_64 GNU/Linux
    

使用原则

  • memcpy和memmove都是是常用的内存块拷贝函数
  • memcpy不提供内存覆盖检测和处理,尝试对源目存在重叠的内存进行拷贝,其行为是不确定的,memcpy不保证源内存重叠区域在被覆盖之前被拷贝
  • 对源目存在重叠的内存拷贝的情形,应选用memmove,它提供了对应的检测和处理
  • 不要揣测Undefined行为,它是版本相关、外部实现相关、不受控的,不应依赖Undefined行为,参考内存重叠情形2中的假设

分析

源目内存的位置关系

memcpy、memmove与内存重叠

其中情形2和情形4,存在内存重叠

memcpy(glibc-2.32)

void *
MEMCPY (void *dstpp, const void *srcpp, size_t len)
{
  unsigned long int dstp = (long int) dstpp;
  unsigned long int srcp = (long int) srcpp;

  /* Copy from the beginning to the end.  */

  /* If there not too few bytes to copy, use word copy.  */
  if (len >= OP_T_THRES)
    {
      /* Copy just a few bytes to make DSTP aligned.  */
      len -= (-dstp) % OPSIZ;
      BYTE_COPY_FWD (dstp, srcp, (-dstp) % OPSIZ);

      /* Copy whole pages from SRCP to DSTP by virtual address manipulation,
	 as much as possible.  */

      PAGE_COPY_FWD_MAYBE (dstp, srcp, len, len);

      /* Copy from SRCP to DSTP taking advantage of the known alignment of
	 DSTP.  Number of bytes remaining is put in the third argument,
	 i.e. in LEN.  This number may vary from machine to machine.  */

      WORD_COPY_FWD (dstp, srcp, len, len);

      /* Fall out and copy the tail.  */
    }

  /* There are just a few bytes to copy.  Use byte memory operations.  */
  BYTE_COPY_FWD (dstp, srcp, len);

  return dstpp;
}

可以看到

  • memcpy实现为从低地址拷贝到高地址,没有是否重叠的检测,综合BYTE、PAGE、WORD等拷贝优化

  • 明显的,对于情形2存在明显问题,源地址重叠部分在拷贝向目的地址前被覆盖

  • 对于情形4,如果从低地址到高地址、逐字节拷贝是不存在覆盖的,但WORD_COPY_FWD的存在使其行为变的不确定,不同glibc版本下会得到不同的结果:

    • Linx 6.0、RHEL <6.5、Ubuntu 20等均能得到期望的结果

    • Linx 8.0、RHEL >7等在调用memcpy处理后,内存数据被破坏,此时如果测试使用BYTE复制能得到正确的结果

    • 因此可以断定,glibc的WORD_COPY_FWD实现同样经过了“正常-更新-引入bug-消缺”的循环

    • 如果将运行在老版本glibc下的代码迁移到一个不是最新版本、而是中间版本的glibc,也容易暴雷

    • 内存及测试代码如下:

      enum
      {
      	eInvalid = 0,
      	eMemcpy,
      	eMemmove,
      	eByteCopy,
      	eMax,
      };
      
      bool memoryCopyTest(int copyType)
      {
      	if (copyType <= eInvalid || copyType >= eMax)
      	{
      		return false;
      	}
      
      	using Juint8 = unsigned char;
      
      	// 待处理数据包
      	Juint8 packet[] = {
      		0x68, 0x2f, 0x0c, 0x00, 0x06, 0x00, 0x74, 0x01, 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x02,
      		0x04, 0x00, 0x16, 0x44, 0x50, 0x44, 0x38, 0x30, 0x30, 0x32, 0x30, 0x31, 0x39, 0x30, 0x35, 0x30,
      		0x38, 0x31, 0x36, 0x35, 0x30, 0x2e, 0x64, 0x61, 0x74, 0xf7, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x00,
      		0x00,
      		0x68, 0xe1, 0x0e, 0x00, 0x06, 0x00, 0x74, 0x01, 0x05, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x02,
      		0x06, 0xf7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64,
      		0x65, 0x09, 0x28, 0x24, 0x28, 0x49, 0x43, 0x43, 0x53, 0x44, 0x45, 0x56, 0x48, 0x4f, 0x4d, 0x45,
      		0x29, 0x2f, 0x73, 0x72, 0x63, 0x2f, 0x68, 0x6d, 0x69, 0x2f, 0x70, 0x6c, 0x75, 0x67, 0x69, 0x6e,
      		0x73, 0x61, 0x75, 0x78, 0x2f, 0x70, 0x6c, 0x75, 0x67, 0x69, 0x6e, 0x73, 0x61, 0x75, 0x78, 0x2e,
      		0x70, 0x72, 0x69, 0x29, 0x31, 0x0d, 0x0a, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x09, 0x28,
      		0x24, 0x28, 0x49, 0x43, 0x43, 0x53, 0x44, 0x45, 0x56, 0x48, 0x4f, 0x4d, 0x45, 0x29, 0x2f, 0x73,
      		0x72, 0x63, 0x2f, 0x68, 0x6d, 0x69, 0x2f, 0x70, 0x6c, 0x75, 0x67, 0x69, 0x6e, 0x73, 0x61, 0x75,
      		0x78, 0x2f, 0x70, 0x6c, 0x75, 0x67, 0x69, 0x6e, 0x73, 0x61, 0x75, 0x78, 0x2e, 0x70, 0x72, 0x69,
      		0x29, 0x32, 0x0d, 0x0a, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x09, 0x28, 0x24, 0x28, 0x49,
      		0x43, 0x43, 0x53, 0x44, 0x45, 0x56, 0x48, 0x4f, 0x4d, 0x45, 0x29, 0x2f, 0x73, 0x72, 0x63, 0x2f,
      		0x68, 0x6d, 0x69, 0x2f, 0x70, 0x6c, 0x75, 0x67, 0x69, 0x6e, 0x73, 0x61, 0x75, 0x78, 0x2f, 0x70,
      		0x6c, 0x75, 0x67, 0x69, 0x6e, 0x73, 0x61, 0x75, 0x78, 0x2e, 0x70, 0x72, 0x69, 0x29, 0x33,
      	};
      	// 数据包长度
      	const size_t nPacketLen = sizeof(packet);
      
      	// 接收缓冲区
      	Juint8 readBuffer[1024] = {};
      	memcpy(readBuffer, packet, nPacketLen);
      	size_t nReadBufferDataLength = nPacketLen;
      
      	// 已处理数据长度
      	int processedDataLen = 49;
      	nReadBufferDataLength -= processedDataLen;
      
      	// 剩余需要处理的数据
      	Juint8 dataRemainingAfterProcess[1024] = {};
      	memcpy(dataRemainingAfterProcess, readBuffer + processedDataLen, nReadBufferDataLength);
      
      	// 拷贝比较
      	switch (copyType)
      	{
      	case eMemcpy:
      	{
      		memcpy(readBuffer, readBuffer + processedDataLen, nReadBufferDataLength);
      	}
      	break;
      	case eMemmove:
      	{
      		memmove(readBuffer, readBuffer + processedDataLen, nReadBufferDataLength);
      	}
      	break;
      	case eByteCopy:
      	{
      		for (size_t i = 0; i < nReadBufferDataLength; ++i)
      		{
      			readBuffer[i] = readBuffer[processedDataLen + i];
      		}
      	}
      	break;
      	default:
      		break;
      	}
      
      	for (size_t i = 0; i < nReadBufferDataLength; ++i)
      	{
      		if (readBuffer[i] != dataRemainingAfterProcess[i])
      		{
      			std::cout << "memory corrupted!" << std::endl;
      			return false;
      		}
      	}
      
      	std::cout << "Copy success!" << std::endl;
      	return true;
      }
      
      
      int main(int argc, char** argv)
      {
      	memoryCopyTest(eMemcpy);
      	return 0;
      }
      

memmove(glibc-2.32)

rettype
inhibit_loop_to_libcall
MEMMOVE (a1const void *a1, a2const void *a2, size_t len)
{
  unsigned long int dstp = (long int) dest;
  unsigned long int srcp = (long int) src;

  /* This test makes the forward copying code be used whenever possible.
     Reduces the working set.  */
  if (dstp - srcp >= len)	/* *Unsigned* compare!  */
    {
      /* Copy from the beginning to the end.  */

#if MEMCPY_OK_FOR_FWD_MEMMOVE
      dest = memcpy (dest, src, len);
#else
      /* If there not too few bytes to copy, use word copy.  */
      if (len >= OP_T_THRES)
	{
	  /* Copy just a few bytes to make DSTP aligned.  */
	  len -= (-dstp) % OPSIZ;
	  BYTE_COPY_FWD (dstp, srcp, (-dstp) % OPSIZ);

	  /* Copy whole pages from SRCP to DSTP by virtual address
	     manipulation, as much as possible.  */

	  PAGE_COPY_FWD_MAYBE (dstp, srcp, len, len);

	  /* Copy from SRCP to DSTP taking advantage of the known
	     alignment of DSTP.  Number of bytes remaining is put
	     in the third argument, i.e. in LEN.  This number may
	     vary from machine to machine.  */

	  WORD_COPY_FWD (dstp, srcp, len, len);

	  /* Fall out and copy the tail.  */
	}

      /* There are just a few bytes to copy.  Use byte memory operations.  */
      BYTE_COPY_FWD (dstp, srcp, len);
#endif /* MEMCPY_OK_FOR_FWD_MEMMOVE */
    }
  else
    {
      /* Copy from the end to the beginning.  */
      srcp += len;
      dstp += len;

      /* If there not too few bytes to copy, use word copy.  */
      if (len >= OP_T_THRES)
	{
	  /* Copy just a few bytes to make DSTP aligned.  */
	  len -= dstp % OPSIZ;
	  BYTE_COPY_BWD (dstp, srcp, dstp % OPSIZ);

	  /* Copy from SRCP to DSTP taking advantage of the known
	     alignment of DSTP.  Number of bytes remaining is put
	     in the third argument, i.e. in LEN.  This number may
	     vary from machine to machine.  */

	  WORD_COPY_BWD (dstp, srcp, len, len);

	  /* Fall out and copy the tail.  */
	}

      /* There are just a few bytes to copy.  Use byte memory operations.  */
      BYTE_COPY_BWD (dstp, srcp, len);
    }

  RETURN (dest);
}

可以看到,memmove在拷贝前进行了一个有符号的判断,对于情形2和情形4都是进入Copy from the end to the beginning分支,从高地址拷贝到低地址,涵盖了两种覆盖情形;未覆盖分支的实现与memcpy相同

为什么不都用memmove代替memcpy?

  • 内存重叠可能并没有那么明显,支持者认为memcpy带来的是Undefined行为

    • 对于情形2,常规认知上正序拷贝其实能正确处理内存覆盖情形,但实际上其行为与glibc的版本和更新有关,如:

      真的未必。Linus 说的那一大段,当时背景就是 adobe flash player 里有一些该使用 memmove 的地方误用了 memcpy,glibc 某一次升级后暴露了 flash 的这个问题,导致 flash 在 Linux 下面播放音频有杂音。

  • 反对者认为memmove额外的覆盖判断降低了效率

上一篇:C语言中memcpy用法的注意点


下一篇:memcpy的实现