qemu QOM(qemu object model)和设备模拟

本文所用qemu为1.5版本的,不是android emulator的。

之前几篇文章介绍的都是android emulator中的设备模拟。一些是android自己加的platform bus上的虚拟设备;一些是qemu自己的虚拟设备,但是这部分代码很旧,没有使用到QOM模型。

qemu1.1以及之后的qemu开始有了QOM模型。QOM很大一部分代码是为了实现了C++的继承,公用的东西放到ObjectClass里,只有一个实例;其他的放Object里,可以有多个实例。

PS:android emulator的代码对应了qemu 0.1x的代码,但是又有一些新版本的qemu的代码porting上去了。


QOM设备模型可以看:

1、Qemu中的设备注册:http://ytliu.info/blog/2015/01/10/qemushe-bei-chu-shi-hua/

2、QEMU 设备模拟:http://mnstory.net/wp-content/uploads/2014/10/qemu-device-simulation/qemu-device-simulation.pdf


第二篇pdf讲的非常详细了,但是最后关于PMIO地址和读写函数如何对应起来的,还是有些没清楚的地方。

本文针对这个问题进行一些补充。


初始化内存空间

在memory_map_init(main->cpu_exec_init_all->memory_map_init)中,会设置MemoryRegion改变时的回调函数,memory_map_init是在设备注册之前调用的:

static void memory_map_init(void)
{
    system_memory = g_malloc(sizeof(*system_memory));
    memory_region_init(system_memory, "system", INT64_MAX);
    address_space_init(&address_space_memory, system_memory);
    address_space_memory.name = "memory";

    system_io = g_malloc(sizeof(*system_io));
    memory_region_init(system_io, "io", 65536);
    address_space_init(&address_space_io, system_io);
    address_space_io.name = "I/O";

    memory_listener_register(&core_memory_listener, &address_space_memory);
    memory_listener_register(&io_memory_listener, &address_space_io);
    memory_listener_register(&tcg_memory_listener, &address_space_memory);

    dma_context_init(&dma_context_memory, &address_space_memory,
                     NULL, NULL, NULL);
}


这个是PMIO的listener,PMIO的MemoryRegion改变后,会调用io_region_add函数,映射PMIO地址和设备读写函数。

普通内存是其他的listener。

static MemoryListener io_memory_listener = {
    .region_add = io_region_add,
    .region_del = io_region_del,
    .priority = 0,
};

把注册的listener添加到全局的memory_listeners链表中:

void memory_listener_register(MemoryListener *listener, AddressSpace *filter)
{
    MemoryListener *other = NULL;
    AddressSpace *as;

    listener->address_space_filter = filter; // listener是处理那个AddressSpace的
    if (QTAILQ_EMPTY(&memory_listeners)
        || listener->priority >= QTAILQ_LAST(&memory_listeners,
                                             memory_listeners)->priority) {
        QTAILQ_INSERT_TAIL(&memory_listeners, listener, link);
    } else {
        QTAILQ_FOREACH(other, &memory_listeners, link) {
            if (listener->priority < other->priority) {
                break;
            }
        }
        QTAILQ_INSERT_BEFORE(other, listener, link);
    }

    QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
        listener_add_address_space(listener, as);
    }
}


对AddressSpace(也就是根MemoryRegion)中的每一个MemoryRegion进行一下listener->region_add

static void listener_add_address_space(MemoryListener *listener,
                                       AddressSpace *as)
{
    FlatRange *fr;

    if (listener->address_space_filter
        && listener->address_space_filter != as) {
        return;
    }

    if (global_dirty_log) {
        if (listener->log_global_start) {
            listener->log_global_start(listener);
        }
    }

    FOR_EACH_FLAT_RANGE(fr, as->current_map) {
        MemoryRegionSection section = {
            .mr = fr->mr,
            .address_space = as,
            .offset_within_region = fr->offset_in_region,
            .size = int128_get64(fr->addr.size),
            .offset_within_address_space = int128_get64(fr->addr.start),
            .readonly = fr->readonly,
        };
        if (listener->region_add) {
            listener->region_add(listener, secion);
        }
    }
}

添加PIT设备

MemoryRegion有修改,更新,调用io_region_add函数

添加pit设备时,会调用到memory_region_add_subregion函数,MemoryRegion被修改了,然后会调用到memory_region_transaction_commit函数,更新地址空间,调用listener,是在这里映射PMIO地址和设备读写函数的。

注意memory_region_transaction_depth的使用,保证多层调用时,只需要更新一次。

void memory_region_transaction_commit(void)
{
    AddressSpace *as;

    assert(memory_region_transaction_depth);
    --memory_region_transaction_depth;
    if (!memory_region_transaction_depth && memory_region_update_pending) {
        memory_region_update_pending = false;
        MEMORY_LISTENER_CALL_GLOBAL(begin, Forward);

        QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
            address_space_update_topology(as);
        }

        MEMORY_LISTENER_CALL_GLOBAL(commit, Forward);
    }
}


真正的更新操作:

static void address_space_update_topology(AddressSpace *as)
{
    FlatView old_view = *as->current_map;
    FlatView new_view = generate_memory_topology(as->root);

    address_space_update_topology_pass(as, old_view, new_view, false);
    address_space_update_topology_pass(as, old_view, new_view, true);

    *as->current_map = new_view;
    flatview_destroy(&old_view);
    address_space_update_ioeventfds(as);
}


static FlatView generate_memory_topology(MemoryRegion *mr)
{
    FlatView view;

    flatview_init(&view);

    if (mr) {
        render_memory_region(&view, mr, int128_zero(),
                             addrrange_make(int128_zero(), int128_2_64()), false);
    }
    flatview_simplify(&view);

    return view;
}



FlatView有点像把链表描述的MemoryRegion搞成了FlatRange数组(FlatRange中记录了MemoryRegion,自然可以获得MemoryRegion中的pit_ioport_ops):
static void render_memory_region(FlatView *view,
                                 MemoryRegion *mr,
                                 Int128 base,
                                 AddrRange clip,
                                 bool readonly)
{
    MemoryRegion *subregion;
    unsigned i;
    hwaddr offset_in_region;
    Int128 remain;
    Int128 now;
    FlatRange fr;
    AddrRange tmp;

    if (!mr->enabled) {
        return;
    }

    int128_addto(&base, int128_make64(mr->addr));
    readonly |= mr->readonly;

    tmp = addrrange_make(base, mr->size);

    if (!addrrange_intersects(tmp, clip)) {
        return;
    }

    clip = addrrange_intersection(tmp, clip);

    if (mr->alias) {
        int128_subfrom(&base, int128_make64(mr->alias->addr));
        int128_subfrom(&base, int128_make64(mr->alias_offset));
        render_memory_region(view, mr->alias, base, clip, readonly);
        return;
    }

    /* Render subregions in priority order. */
    QTAILQ_FOREACH(subregion, &mr->subregions, subregions_link) {
        render_memory_region(view, subregion, base, clip, readonly);
    }

    if (!mr->terminates) {
        return;
    }

    offset_in_region = int128_get64(int128_sub(clip.start, base));
    base = clip.start;
    remain = clip.size;

    /* Render the region itself into any gaps left by the current view. */
    for (i = 0; i < view->nr && int128_nz(remain); ++i) {
        if (int128_ge(base, addrrange_end(view->ranges[i].addr))) {
            continue;
        }
        if (int128_lt(base, view->ranges[i].addr.start)) {
            now = int128_min(remain,
                             int128_sub(view->ranges[i].addr.start, base));
            fr.mr = mr;
            fr.offset_in_region = offset_in_region;
            fr.addr = addrrange_make(base, now);
            fr.dirty_log_mask = mr->dirty_log_mask;
            fr.readable = mr->readable;
            fr.readonly = readonly;
            flatview_insert(view, i, &fr);
            ++i;
            int128_addto(&base, now);
            offset_in_region += int128_get64(now);
            int128_subfrom(&remain, now);
        }
        now = int128_sub(int128_min(int128_add(base, remain),
                                    addrrange_end(view->ranges[i].addr)),
                         base);
        int128_addto(&base, now);
        offset_in_region += int128_get64(now);
        int128_subfrom(&remain, now);
    }
    if (int128_nz(remain)) {
        fr.mr = mr;
        fr.offset_in_region = offset_in_region;
        fr.addr = addrrange_make(base, remain);
        fr.dirty_log_mask = mr->dirty_log_mask;
        fr.readable = mr->readable;
        fr.readonly = readonly;
        flatview_insert(view, i, &fr);
    }
}


MemoryRegion被修改的话,如果是有添加,那么会调用到MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, region_add),添加PMIO地址和设备的映射关系。

static void address_space_update_topology_pass(AddressSpace *as,
                                               FlatView old_view,
                                               FlatView new_view,
                                               bool adding)
{
    unsigned iold, inew;
    FlatRange *frold, *frnew;

    /* Generate a symmetric difference of the old and new memory maps.
     * Kill ranges in the old map, and instantiate ranges in the new map.
     */
    iold = inew = 0;
    while (iold < old_view.nr || inew < new_view.nr) {
        if (iold < old_view.nr) {
            frold = &old_view.ranges[iold];
        } else {
            frold = NULL;
        }
        if (inew < new_view.nr) {
            frnew = &new_view.ranges[inew];
        } else {
            frnew = NULL;
        }

        if (frold
            && (!frnew
                || int128_lt(frold->addr.start, frnew->addr.start)
                || (int128_eq(frold->addr.start, frnew->addr.start)
                    && !flatrange_equal(frold, frnew)))) {
            /* In old, but (not in new, or in new but attributes changed). */

            if (!adding) {
                MEMORY_LISTENER_UPDATE_REGION(frold, as, Reverse, region_del);
            }

            ++iold;
        } else if (frold && frnew && flatrange_equal(frold, frnew)) {
            /* In both (logging may have changed) */

            if (adding) {
                MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, region_nop);
                if (frold->dirty_log_mask && !frnew->dirty_log_mask) {
                    MEMORY_LISTENER_UPDATE_REGION(frnew, as, Reverse, log_stop);
                } else if (frnew->dirty_log_mask && !frold->dirty_log_mask) {
                    MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, log_start);
                }
            }

            ++iold;
            ++inew;
        } else {
            /* In new */

            if (adding) {
                MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, region_add);
            }

            ++inew;
        }
    }
}


#define MEMORY_LISTENER_UPDATE_REGION(fr, as, dir, callback)            \
    MEMORY_LISTENER_CALL(callback, dir, (&(MemoryRegionSection) {       \
        .mr = (fr)->mr,                                                 \
        .address_space = (as),                                          \
        .offset_within_region = (fr)->offset_in_region,                 \
        .size = int128_get64((fr)->addr.size),                          \
        .offset_within_address_space = int128_get64((fr)->addr.start),  \
        .readonly = (fr)->readonly,                                     \
              }))


调用了region_add函数,也就是io_region_add函数:

#define MEMORY_LISTENER_CALL(_callback, _direction, _section, _args...) \
    do {                                                                \
        MemoryListener *_listener;                                      \
                                                                        \
        switch (_direction) {                                           \
        case Forward:                                                   \
            QTAILQ_FOREACH(_listener, &memory_listeners, link) {        \
                if (_listener->_callback                                \
                    && memory_listener_match(_listener, _section)) {    \
                    _listener->_callback(_listener, _section, ##_args); \
                }                                                       \
            }                                                           \
            break;                                                      \
        case Reverse:                                                   \
            QTAILQ_FOREACH_REVERSE(_listener, &memory_listeners,        \
                                   memory_listeners, link) {            \
                if (_listener->_callback                                \
                    && memory_listener_match(_listener, _section)) {    \
                    _listener->_callback(_listener, _section, ##_args); \
                }                                                       \
            }                                                           \
            break;                                                      \
        default:                                                        \
            abort();                                                    \
        }                                                               \
    } while (0)


io_region_add函数处理PMIO地址和设备读写函数的映射关系

static void io_region_add(MemoryListener *listener,
                          MemoryRegionSection *section)
{
    MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);

    mrio->mr = section->mr;
    mrio->offset = section->offset_within_region;
    iorange_init(&mrio->iorange, &memory_region_iorange_ops,
                 section->offset_within_address_space, section->size);
    ioport_register(&mrio->iorange);
}


memory_region_iorange_ops就是IORange->ops,其中读函数为memory_region_iorange_read,真正的设备读写函数保存在mr中:

static void memory_region_iorange_read(IORange *iorange,
                                       uint64_t offset,
                                       unsigned width,
                                       uint64_t *data)
{
    MemoryRegionIORange *mrio
        = container_of(iorange, MemoryRegionIORange, iorange);
    MemoryRegion *mr = mrio->mr;

    offset += mrio->offset;
    if (mr->ops->old_portio) {
        const MemoryRegionPortio *mrp = find_portio(mr, offset - mrio->offset,
                                                    width, false);

        *data = ((uint64_t)1 << (width * 8)) - 1;
        if (mrp) {
            *data = mrp->read(mr->opaque, offset);
        } else if (width == 2) {
            mrp = find_portio(mr, offset - mrio->offset, 1, false);
            assert(mrp);
            *data = mrp->read(mr->opaque, offset) |
                    (mrp->read(mr->opaque, offset + 1) << 8);
        }
        return;
    }
    *data = 0;
    access_with_adjusted_size(offset, data, width,
                              mr->ops->impl.min_access_size,
                              mr->ops->impl.max_access_size,
                              memory_region_read_accessor, mr);
}


这里执行真正的读写函数,也就是pit_ioport_ops。

static void memory_region_write_accessor(void *opaque,
                                         hwaddr addr,
                                         uint64_t *value,
                                         unsigned size,
                                         unsigned shift,
                                         uint64_t mask)
{
    MemoryRegion *mr = opaque;
    uint64_t tmp;

    if (mr->flush_coalesced_mmio) {
        qemu_flush_coalesced_mmio_buffer();
    }
    tmp = (*value >> shift) & mask;
    mr->ops->write(mr->opaque, addr, tmp, size);
}



注册设备的三组读写函数:

void ioport_register(IORange *ioport)
{
    register_ioport_read(ioport->base, ioport->len, 1,
                         ioport_readb_thunk, ioport);
    register_ioport_read(ioport->base, ioport->len, 2,
                         ioport_readw_thunk, ioport);
    register_ioport_read(ioport->base, ioport->len, 4,
                         ioport_readl_thunk, ioport);
    register_ioport_write(ioport->base, ioport->len, 1,
                          ioport_writeb_thunk, ioport);
    register_ioport_write(ioport->base, ioport->len, 2,
                          ioport_writew_thunk, ioport);
    register_ioport_write(ioport->base, ioport->len, 4,
                          ioport_writel_thunk, ioport);
    ioport_destructor_table[ioport->base] = iorange_destructor_thunk;
}


在这里注册了PMIO地址对应的读函数是ioport_readb_thunk,这就是一个壳。

int register_ioport_read(pio_addr_t start, int length, int size,
                         IOPortReadFunc *func, void *opaque)
{
    int i, bsize;

    if (ioport_bsize(size, &bsize)) {
        hw_error("register_ioport_read: invalid size");
        return -1;
    }
    for(i = start; i < start + length; ++i) {
        ioport_read_table[bsize][i] = func;
        if (ioport_opaque[i] != NULL && ioport_opaque[i] != opaque)
            hw_error("register_ioport_read: invalid opaque for address 0x%x",
                     i);
        ioport_opaque[i] = opaque;
    }
    return 0;
}

通过opaque可以获取读函数IORange->ops->read(也就是memory_region_iorange_read)。

static uint32_t ioport_readb_thunk(void *opaque, uint32_t addr)
{
    IORange *ioport = opaque;
    uint64_t data;

    ioport->ops->read(ioport, addr - ioport->base, 1, &data);
    return data;
}


对于读PMIO,KVM_EXIT_IO之后的流程是:

kvm_handle_io

    ->cpu_inb

        ->ioport_read

            ->ioport_read_table[0][addr](也就是ioport_readb_thunk)

                ->memory_region_iorange_ops(也就是IORange->ops)

                    ->access_with_adjusted_size(需要mr,保存了pit_ioport_ops)

                        ->memory_region_read_accessor

                            ->mr-ops


PS:

1、Object的parent可能是用来搞总线结构的,比如Object是bus上的设备,parent是bus。

2、ObjectProperty里面type为child<的应该就是Object用来记录子Object的,也就是bus记录上面挂的设备的。

3、注意QObject和QType(比C语言的type多了ref),用来折腾ObjectProperty的属性设置的,和之前的Object,ObjectClass不同。

4、设置属性都是通过object_property_set_qobject来设置的,会生成visitor,然后调用void object_property_set(Object *obj, Visitor *v, const char *name,
                         Error **errp)。

5、isa_create中创建了Object,调用了pit_class_initfn等初始化函数。

6、isa bus的address_space_io就是系统的system_io:

static void pc_init_isa(QEMUMachineInitArgs *args)
{
    ram_addr_t ram_size = args->ram_size;
    const char *cpu_model = args->cpu_model;
    const char *kernel_filename = args->kernel_filename;
    const char *kernel_cmdline = args->kernel_cmdline;
    const char *initrd_filename = args->initrd_filename;
    const char *boot_device = args->boot_device;
    has_pvpanic = false;
    if (cpu_model == NULL)
        cpu_model = "486";
    disable_kvm_pv_eoi();
    enable_compat_apic_id_mode();
    pc_init1(get_system_memory(),
             get_system_io(),
             ram_size, boot_device,
             kernel_filename, kernel_cmdline,
             initrd_filename, cpu_model, 0, 1);
}


pc_init1中:

if (pci_enabled) {
        pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, &isa_bus, gsi,
                              system_memory, system_io, ram_size,
                              below_4g_mem_size,
                              0x100000000ULL - below_4g_mem_size,
                              0x100000000ULL + above_4g_mem_size,
                              (sizeof(hwaddr) == 4
                               ? 0
                               : ((uint64_t)1 << 62)),
                              pci_memory, ram_memory);
    } else {
        pci_bus = NULL;
        i440fx_state = NULL;
        isa_bus = isa_bus_new(NULL, system_io);
        no_hpet = 1;
    }





上一篇:游戏的运行逻辑分析 | Python从入门到精通:入门篇之二十二


下一篇:飞天加速计划·高校学生在家实践——ECS服务器初体验