Discussion:
[PATCH v2 0/5] hpet 'driftfix': alleviate time drift with HPET periodic timers
(too old to reply)
Ulrich Obergfell
2011-04-08 15:20:37 UTC
Permalink
Hi,

This is version 2 of a series of patches that I originally posted in:

http://lists.gnu.org/archive/html/qemu-devel/2011-03/msg01989.html
http://lists.gnu.org/archive/html/qemu-devel/2011-03/msg01992.html
http://lists.gnu.org/archive/html/qemu-devel/2011-03/msg01991.html
http://lists.gnu.org/archive/html/qemu-devel/2011-03/msg01990.html

http://article.gmane.org/gmane.comp.emulators.kvm.devel/69325
http://article.gmane.org/gmane.comp.emulators.kvm.devel/69326
http://article.gmane.org/gmane.comp.emulators.kvm.devel/69327
http://article.gmane.org/gmane.comp.emulators.kvm.devel/69328

I implemented the proposed changes (remove configurability and ifdefs,
define 'driftfix' as a property of the HPET device) and I've split the
patch into smaller parts (originally 3, now 5). I also included a fix
related to scaling 't->irqs_to_inject' when the guest o/s modifies the
comparator register value.

Please review and please comment.

Regards,

Uli


Ulrich Obergfell (5):
hpet 'driftfix': add hooks required to detect coalesced interrupts
(x86 apic only)
hpet 'driftfix': add driftfix property to HPETState and DeviceInfo
hpet 'driftfix': add fields to HPETTimer and VMStateDescription
hpet 'driftfix': add code in update_irq() to detect coalesced
interrupts (x86 apic only)
hpet 'driftfix': add code in hpet_timer() to compensate delayed
callbacks and coalesced interrupts

hw/apic.c | 4 +++
hw/hpet.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
sysemu.h | 3 ++
vl.c | 3 ++
4 files changed, 95 insertions(+), 5 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Ulrich Obergfell
2011-04-08 15:20:39 UTC
Permalink
driftfix is a 'bit type' property. Compensation of delayed callbacks
and coalesced interrupts can be enabled with the command line option

-global hpet.driftfix=on

driftfix is 'off' (disabled) by default.

Signed-off-by: Ulrich Obergfell <***@redhat.com>
---
hw/hpet.c | 3 +++
1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/hw/hpet.c b/hw/hpet.c
index e92b775..45847ed 100644
--- a/hw/hpet.c
+++ b/hw/hpet.c
@@ -72,6 +72,8 @@ typedef struct HPETState {
uint64_t isr; /* interrupt status reg */
uint64_t hpet_counter; /* main counter */
uint8_t hpet_id; /* instance id */
+
+ uint32_t driftfix;
} HPETState;

static uint32_t hpet_in_legacy_mode(HPETState *s)
@@ -738,6 +740,7 @@ static SysBusDeviceInfo hpet_device_info = {
.qdev.props = (Property[]) {
DEFINE_PROP_UINT8("timers", HPETState, num_timers, HPET_MIN_TIMERS),
DEFINE_PROP_BIT("msi", HPETState, flags, HPET_MSI_SUPPORT, false),
+ DEFINE_PROP_BIT("driftfix", HPETState, driftfix, 0, false),
DEFINE_PROP_END_OF_LIST(),
},
};
--
1.6.2.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Ulrich Obergfell
2011-04-08 15:20:38 UTC
Permalink
'target_get_irq_delivered' and 'target_reset_irq_delivered' contain
entry addresses of functions that are utilized by update_irq() to
detect coalesced interrupts. apic code loads these pointers during
initialization.

Signed-off-by: Ulrich Obergfell <***@redhat.com>
---
hw/apic.c | 4 ++++
sysemu.h | 3 +++
vl.c | 3 +++
3 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/hw/apic.c b/hw/apic.c
index 00907e0..44d8cb3 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -24,6 +24,7 @@
#include "sysbus.h"
#include "trace.h"
#include "kvm.h"
+#include "sysemu.h"

/* APIC Local Vector Table */
#define APIC_LVT_TIMER 0
@@ -1143,6 +1144,9 @@ static SysBusDeviceInfo apic_info = {

static void apic_register_devices(void)
{
+ target_get_irq_delivered = apic_get_irq_delivered;
+ target_reset_irq_delivered = apic_reset_irq_delivered;
+
sysbus_register_withprop(&apic_info);
}

diff --git a/sysemu.h b/sysemu.h
index 3f7e17e..b5d4f90 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -96,6 +96,9 @@ int qemu_savevm_state_complete(Monitor *mon, QEMUFile *f);
void qemu_savevm_state_cancel(Monitor *mon, QEMUFile *f);
int qemu_loadvm_state(QEMUFile *f);

+extern int (*target_get_irq_delivered)(void);
+extern void (*target_reset_irq_delivered)(void);
+
/* SLIRP */
void do_info_slirp(Monitor *mon);

diff --git a/vl.c b/vl.c
index 8bcf2ae..548912a 100644
--- a/vl.c
+++ b/vl.c
@@ -231,6 +231,9 @@ const char *prom_envs[MAX_PROM_ENVS];
const char *nvram = NULL;
int boot_menu;

+int (*target_get_irq_delivered)(void) = 0;
+void (*target_reset_irq_delivered)(void) = 0;
+
typedef struct FWBootEntry FWBootEntry;

struct FWBootEntry {
--
1.6.2.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Anthony Liguori
2011-04-08 15:46:30 UTC
Permalink
Post by Ulrich Obergfell
'target_get_irq_delivered' and 'target_reset_irq_delivered' contain
entry addresses of functions that are utilized by update_irq() to
detect coalesced interrupts. apic code loads these pointers during
initialization.
---
hw/apic.c | 4 ++++
sysemu.h | 3 +++
vl.c | 3 +++
3 files changed, 10 insertions(+), 0 deletions(-)
diff --git a/hw/apic.c b/hw/apic.c
index 00907e0..44d8cb3 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -24,6 +24,7 @@
#include "sysbus.h"
#include "trace.h"
#include "kvm.h"
+#include "sysemu.h"
/* APIC Local Vector Table */
#define APIC_LVT_TIMER 0
@@ -1143,6 +1144,9 @@ static SysBusDeviceInfo apic_info = {
static void apic_register_devices(void)
{
+ target_get_irq_delivered = apic_get_irq_delivered;
+ target_reset_irq_delivered = apic_reset_irq_delivered;
+
sysbus_register_withprop(&apic_info);
}
diff --git a/sysemu.h b/sysemu.h
index 3f7e17e..b5d4f90 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -96,6 +96,9 @@ int qemu_savevm_state_complete(Monitor *mon, QEMUFile *f);
void qemu_savevm_state_cancel(Monitor *mon, QEMUFile *f);
int qemu_loadvm_state(QEMUFile *f);
+extern int (*target_get_irq_delivered)(void);
+extern void (*target_reset_irq_delivered)(void);
It's a bit nicer to use a registration function instead of setting a
global function pointer directly.

Regards,

Anthony Liguori
Post by Ulrich Obergfell
+
/* SLIRP */
void do_info_slirp(Monitor *mon);
diff --git a/vl.c b/vl.c
index 8bcf2ae..548912a 100644
--- a/vl.c
+++ b/vl.c
@@ -231,6 +231,9 @@ const char *prom_envs[MAX_PROM_ENVS];
const char *nvram = NULL;
int boot_menu;
+int (*target_get_irq_delivered)(void) = 0;
+void (*target_reset_irq_delivered)(void) = 0;
+
typedef struct FWBootEntry FWBootEntry;
struct FWBootEntry {
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Jan Kiszka
2011-04-08 15:51:51 UTC
Permalink
Post by Ulrich Obergfell
'target_get_irq_delivered' and 'target_reset_irq_delivered' contain
entry addresses of functions that are utilized by update_irq() to
detect coalesced interrupts. apic code loads these pointers during
initialization.
This interface is intended as a temporary helper to allow establishing
the driftfix infrastructure, right? Then please leave proper comments
behind that it will die once we have a real feedback path for IRQ
delivery (that also takes routing into account).

Jan
--
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Ulrich Obergfell
2011-04-08 15:20:42 UTC
Permalink
Loss of periodic timer interrupts caused by delayed callbacks and by
interrupt coalescing is compensated by gradually injecting additional
interrupts during subsequent timer intervals, starting at a rate of
one additional interrupt per interval. If further interrupts are lost
while compensation is in progress, the rate is increased. A limit is
imposed on the rate and on the 'backlog' of lost interrupts that are
to be injected. If a guest o/s modifies the comparator register value
while compensation is in progress, the 'backlog' of lost interrupts
that are to be injected is scaled to the new value.

Injecting additional timer interrupts to compensate lost interrupts
can alleviate long term time drift. However, on a short time scale,
this method can have the side effect of making virtual machine time
intermittently pass faster than real time (depending on the guest's
time keeping algorithm). Compensation is disabled by default and can
be enabled for guests where this behaviour is acceptable.

Signed-off-by: Ulrich Obergfell <***@redhat.com>
---
hw/hpet.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 57 insertions(+), 1 deletions(-)

diff --git a/hw/hpet.c b/hw/hpet.c
index b5625fb..5a25a96 100644
--- a/hw/hpet.c
+++ b/hw/hpet.c
@@ -42,6 +42,9 @@

#define HPET_MSI_SUPPORT 0

+#define MAX_IRQS_TO_INJECT (uint32_t)5000
+#define MAX_IRQ_RATE (uint32_t)10
+
struct HPETState;
typedef struct HPETTimer { /* timers */
uint8_t tn; /*timer number*/
@@ -309,8 +312,11 @@ static const VMStateDescription vmstate_hpet = {
static void hpet_timer(void *opaque)
{
HPETTimer *t = opaque;
+ HPETState *s = t->state;
uint64_t diff;

+ int irq_delivered = 0;
+ uint32_t irq_count = 0;
uint64_t period = t->period;
uint64_t cur_tick = hpet_get_ticks(t->state);

@@ -318,13 +324,36 @@ static void hpet_timer(void *opaque)
if (t->config & HPET_TN_32BIT) {
while (hpet_time_after(cur_tick, t->cmp)) {
t->cmp = (uint32_t)(t->cmp + t->period);
+ irq_count++;
}
} else {
while (hpet_time_after64(cur_tick, t->cmp)) {
t->cmp += period;
+ irq_count++;
}
}
diff = hpet_calculate_diff(t, cur_tick);
+ if (s->driftfix) {
+ if (t->saved_period != t->period) {
+ uint64_t tmp = (t->irqs_to_inject * t->saved_period)
+ + t->ticks_not_accounted;
+ t->irqs_to_inject = tmp / t->period;
+ t->ticks_not_accounted = tmp % t->period;
+ t->saved_period = t->period;
+ }
+ t->irqs_to_inject += irq_count;
+ t->irqs_to_inject = MIN(t->irqs_to_inject, MAX_IRQS_TO_INJECT);
+ if (t->irqs_to_inject > 1) {
+ if (irq_count > 1) {
+ t->irq_rate++;
+ t->irq_rate = MIN(t->irq_rate, MAX_IRQ_RATE);
+ }
+ if (irq_count || t->divisor == 0) {
+ t->divisor = t->irq_rate;
+ }
+ diff /= t->divisor--;
+ }
+ }
qemu_mod_timer(t->qemu_timer,
qemu_get_clock(vm_clock) + (int64_t)ticks_to_ns(diff));
} else if (t->config & HPET_TN_32BIT && !timer_is_periodic(t)) {
@@ -335,7 +364,22 @@ static void hpet_timer(void *opaque)
t->wrap_flag = 0;
}
}
- update_irq(t, 1);
+ if (s->driftfix && timer_is_periodic(t) && period != 0) {
+ if (t->irqs_to_inject) {
+ irq_delivered = update_irq(t, 1);
+ if (irq_delivered) {
+ t->irq_rate = MIN(t->irq_rate, t->irqs_to_inject);
+ t->irqs_to_inject--;
+ } else {
+ if (irq_count) {
+ t->irq_rate++;
+ t->irq_rate = MIN(t->irq_rate, MAX_IRQ_RATE);
+ }
+ }
+ }
+ } else {
+ update_irq(t, 1);
+ }
}

static void hpet_set_timer(HPETTimer *t)
@@ -674,6 +718,12 @@ static void hpet_reset(DeviceState *d)
timer->config |= 0x00000004ULL << 32;
timer->period = 0ULL;
timer->wrap_flag = 0;
+
+ timer->saved_period = 0;
+ timer->ticks_not_accounted = 0;
+ timer->irqs_to_inject = 0;
+ timer->irq_rate = 1;
+ timer->divisor = 1;
}

s->hpet_counter = 0ULL;
@@ -734,6 +784,12 @@ static int hpet_init(SysBusDevice *dev)
timer->qemu_timer = qemu_new_timer(vm_clock, hpet_timer, timer);
timer->tn = i;
timer->state = s;
+
+ timer->saved_period = 0;
+ timer->ticks_not_accounted = 0;
+ timer->irqs_to_inject = 0;
+ timer->irq_rate = 1;
+ timer->divisor = 1;
}

/* 64-bit main counter; LegacyReplacementRoute. */
--
1.6.2.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Jan Kiszka
2011-04-08 15:54:52 UTC
Permalink
Post by Ulrich Obergfell
Loss of periodic timer interrupts caused by delayed callbacks and by
interrupt coalescing is compensated by gradually injecting additional
interrupts during subsequent timer intervals, starting at a rate of
one additional interrupt per interval. If further interrupts are lost
while compensation is in progress, the rate is increased. A limit is
imposed on the rate and on the 'backlog' of lost interrupts that are
to be injected. If a guest o/s modifies the comparator register value
while compensation is in progress, the 'backlog' of lost interrupts
that are to be injected is scaled to the new value.
Injecting additional timer interrupts to compensate lost interrupts
can alleviate long term time drift. However, on a short time scale,
this method can have the side effect of making virtual machine time
intermittently pass faster than real time (depending on the guest's
time keeping algorithm). Compensation is disabled by default and can
be enabled for guests where this behaviour is acceptable.
---
hw/hpet.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 57 insertions(+), 1 deletions(-)
diff --git a/hw/hpet.c b/hw/hpet.c
index b5625fb..5a25a96 100644
--- a/hw/hpet.c
+++ b/hw/hpet.c
@@ -42,6 +42,9 @@
#define HPET_MSI_SUPPORT 0
+#define MAX_IRQS_TO_INJECT (uint32_t)5000
+#define MAX_IRQ_RATE (uint32_t)10
+
struct HPETState;
typedef struct HPETTimer { /* timers */
uint8_t tn; /*timer number*/
@@ -309,8 +312,11 @@ static const VMStateDescription vmstate_hpet = {
static void hpet_timer(void *opaque)
{
HPETTimer *t = opaque;
+ HPETState *s = t->state;
uint64_t diff;
+ int irq_delivered = 0;
+ uint32_t irq_count = 0;
uint64_t period = t->period;
uint64_t cur_tick = hpet_get_ticks(t->state);
@@ -318,13 +324,36 @@ static void hpet_timer(void *opaque)
if (t->config & HPET_TN_32BIT) {
while (hpet_time_after(cur_tick, t->cmp)) {
t->cmp = (uint32_t)(t->cmp + t->period);
+ irq_count++;
}
} else {
while (hpet_time_after64(cur_tick, t->cmp)) {
t->cmp += period;
+ irq_count++;
}
}
diff = hpet_calculate_diff(t, cur_tick);
+ if (s->driftfix) {
+ if (t->saved_period != t->period) {
+ uint64_t tmp = (t->irqs_to_inject * t->saved_period)
+ + t->ticks_not_accounted;
+ t->irqs_to_inject = tmp / t->period;
+ t->ticks_not_accounted = tmp % t->period;
+ t->saved_period = t->period;
+ }
+ t->irqs_to_inject += irq_count;
+ t->irqs_to_inject = MIN(t->irqs_to_inject, MAX_IRQS_TO_INJECT);
+ if (t->irqs_to_inject > 1) {
+ if (irq_count > 1) {
+ t->irq_rate++;
+ t->irq_rate = MIN(t->irq_rate, MAX_IRQ_RATE);
+ }
+ if (irq_count || t->divisor == 0) {
+ t->divisor = t->irq_rate;
+ }
+ diff /= t->divisor--;
+ }
+ }
Did I miss some change in the plan? I thought we were heading for a
generic, reusable driftfix tool box (or periodic timer service)? Or is
this intentionally an intermediate step?
Post by Ulrich Obergfell
qemu_mod_timer(t->qemu_timer,
qemu_get_clock(vm_clock) + (int64_t)ticks_to_ns(diff));
} else if (t->config & HPET_TN_32BIT && !timer_is_periodic(t)) {
@@ -335,7 +364,22 @@ static void hpet_timer(void *opaque)
t->wrap_flag = 0;
}
}
- update_irq(t, 1);
+ if (s->driftfix && timer_is_periodic(t) && period != 0) {
+ if (t->irqs_to_inject) {
+ irq_delivered = update_irq(t, 1);
+ if (irq_delivered) {
+ t->irq_rate = MIN(t->irq_rate, t->irqs_to_inject);
+ t->irqs_to_inject--;
+ } else {
+ if (irq_count) {
+ t->irq_rate++;
+ t->irq_rate = MIN(t->irq_rate, MAX_IRQ_RATE);
+ }
+ }
+ }
+ } else {
+ update_irq(t, 1);
+ }
}
static void hpet_set_timer(HPETTimer *t)
@@ -674,6 +718,12 @@ static void hpet_reset(DeviceState *d)
timer->config |= 0x00000004ULL << 32;
timer->period = 0ULL;
timer->wrap_flag = 0;
+
+ timer->saved_period = 0;
+ timer->ticks_not_accounted = 0;
+ timer->irqs_to_inject = 0;
+ timer->irq_rate = 1;
+ timer->divisor = 1;
}
s->hpet_counter = 0ULL;
@@ -734,6 +784,12 @@ static int hpet_init(SysBusDevice *dev)
timer->qemu_timer = qemu_new_timer(vm_clock, hpet_timer, timer);
timer->tn = i;
timer->state = s;
+
+ timer->saved_period = 0;
+ timer->ticks_not_accounted = 0;
+ timer->irqs_to_inject = 0;
+ timer->irq_rate = 1;
+ timer->divisor = 1;
We call reset after initializing devices, at least for cold-plugged ones.

Jan
--
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Glauber Costa
2011-04-08 16:08:44 UTC
Permalink
Post by Jan Kiszka
Post by Ulrich Obergfell
+ }
Did I miss some change in the plan? I thought we were heading for a
generic, reusable driftfix tool box (or periodic timer service)? Or is
this intentionally an intermediate step?
Which is a medium to long way in the future. There is benefit of having
this early, since it can deliver real functionality - a reliable hpet,
and converting to whatever the api may look like in the future.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Jan Kiszka
2011-04-08 16:12:32 UTC
Permalink
Post by Glauber Costa
Post by Jan Kiszka
Post by Ulrich Obergfell
+ }
Did I miss some change in the plan? I thought we were heading for a
generic, reusable driftfix tool box (or periodic timer service)? Or is
this intentionally an intermediate step?
Which is a medium to long way in the future. There is benefit of having
this early, since it can deliver real functionality - a reliable hpet,
and converting to whatever the api may look like in the future.
Well, if the PIT and the RTC should use it as well, which is probably
rather a short term goal, we already have two more users.

Jan
--
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Ulrich Obergfell
2011-04-08 15:20:40 UTC
Permalink
Signed-off-by: Ulrich Obergfell <***@redhat.com>
---
hw/hpet.c | 14 ++++++++++++--
1 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/hw/hpet.c b/hw/hpet.c
index 45847ed..c150da5 100644
--- a/hw/hpet.c
+++ b/hw/hpet.c
@@ -55,6 +55,11 @@ typedef struct HPETTimer { /* timers */
uint8_t wrap_flag; /* timer pop will indicate wrap for one-shot 32-bit
* mode. Next pop will be actual timer expiration.
*/
+ uint64_t saved_period;
+ uint64_t ticks_not_accounted;
+ uint32_t irqs_to_inject;
+ uint32_t irq_rate;
+ uint32_t divisor;
} HPETTimer;

typedef struct HPETState {
@@ -248,7 +253,7 @@ static int hpet_post_load(void *opaque, int version_id)

static const VMStateDescription vmstate_hpet_timer = {
.name = "hpet_timer",
- .version_id = 1,
+ .version_id = 3,
.minimum_version_id = 1,
.minimum_version_id_old = 1,
.fields = (VMStateField []) {
@@ -258,6 +263,11 @@ static const VMStateDescription vmstate_hpet_timer = {
VMSTATE_UINT64(fsb, HPETTimer),
VMSTATE_UINT64(period, HPETTimer),
VMSTATE_UINT8(wrap_flag, HPETTimer),
+ VMSTATE_UINT64_V(saved_period, HPETTimer, 3),
+ VMSTATE_UINT64_V(ticks_not_accounted, HPETTimer, 3),
+ VMSTATE_UINT32_V(irqs_to_inject, HPETTimer, 3),
+ VMSTATE_UINT32_V(irq_rate, HPETTimer, 3),
+ VMSTATE_UINT32_V(divisor, HPETTimer, 3),
VMSTATE_TIMER(qemu_timer, HPETTimer),
VMSTATE_END_OF_LIST()
}
@@ -265,7 +275,7 @@ static const VMStateDescription vmstate_hpet_timer = {

static const VMStateDescription vmstate_hpet = {
.name = "hpet",
- .version_id = 2,
+ .version_id = 3,
.minimum_version_id = 1,
.minimum_version_id_old = 1,
.pre_save = hpet_pre_save,
--
1.6.2.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Anthony Liguori
2011-04-08 15:47:38 UTC
Permalink
Post by Ulrich Obergfell
---
hw/hpet.c | 14 ++++++++++++--
1 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/hw/hpet.c b/hw/hpet.c
index 45847ed..c150da5 100644
--- a/hw/hpet.c
+++ b/hw/hpet.c
@@ -55,6 +55,11 @@ typedef struct HPETTimer { /* timers */
uint8_t wrap_flag; /* timer pop will indicate wrap for one-shot 32-bit
* mode. Next pop will be actual timer expiration.
*/
+ uint64_t saved_period;
+ uint64_t ticks_not_accounted;
+ uint32_t irqs_to_inject;
+ uint32_t irq_rate;
+ uint32_t divisor;
} HPETTimer;
typedef struct HPETState {
@@ -248,7 +253,7 @@ static int hpet_post_load(void *opaque, int version_id)
static const VMStateDescription vmstate_hpet_timer = {
.name = "hpet_timer",
- .version_id = 1,
+ .version_id = 3,
Why jump from 1 to 3?
Post by Ulrich Obergfell
.minimum_version_id = 1,
.minimum_version_id_old = 1,
.fields = (VMStateField []) {
@@ -258,6 +263,11 @@ static const VMStateDescription vmstate_hpet_timer = {
VMSTATE_UINT64(fsb, HPETTimer),
VMSTATE_UINT64(period, HPETTimer),
VMSTATE_UINT8(wrap_flag, HPETTimer),
+ VMSTATE_UINT64_V(saved_period, HPETTimer, 3),
+ VMSTATE_UINT64_V(ticks_not_accounted, HPETTimer, 3),
+ VMSTATE_UINT32_V(irqs_to_inject, HPETTimer, 3),
+ VMSTATE_UINT32_V(irq_rate, HPETTimer, 3),
+ VMSTATE_UINT32_V(divisor, HPETTimer, 3),
We ought to be able to use a subsection keyed off of whether any ticks
are currently accumulated, no?

Regards,

Anthony Liguori
Post by Ulrich Obergfell
VMSTATE_TIMER(qemu_timer, HPETTimer),
VMSTATE_END_OF_LIST()
}
@@ -265,7 +275,7 @@ static const VMStateDescription vmstate_hpet_timer = {
static const VMStateDescription vmstate_hpet = {
.name = "hpet",
- .version_id = 2,
+ .version_id = 3,
.minimum_version_id = 1,
.minimum_version_id_old = 1,
.pre_save = hpet_pre_save,
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Ulrich Obergfell
2011-04-11 08:24:04 UTC
Permalink
Post by Anthony Liguori
Post by Ulrich Obergfell
typedef struct HPETState {
@@ -248,7 +253,7 @@ static int hpet_post_load(void *opaque, int version_id)
static const VMStateDescription vmstate_hpet_timer = {
.name = "hpet_timer",
- .version_id = 1,
+ .version_id = 3,
Why jump from 1 to 3?
Post by Ulrich Obergfell
.minimum_version_id = 1,
.minimum_version_id_old = 1,
.fields = (VMStateField []) {
@@ -258,6 +263,11 @@ static const VMStateDescription
vmstate_hpet_timer = {
VMSTATE_UINT64(fsb, HPETTimer),
VMSTATE_UINT64(period, HPETTimer),
VMSTATE_UINT8(wrap_flag, HPETTimer),
+ VMSTATE_UINT64_V(saved_period, HPETTimer, 3),
+ VMSTATE_UINT64_V(ticks_not_accounted, HPETTimer, 3),
+ VMSTATE_UINT32_V(irqs_to_inject, HPETTimer, 3),
+ VMSTATE_UINT32_V(irq_rate, HPETTimer, 3),
+ VMSTATE_UINT32_V(divisor, HPETTimer, 3),
Anthony,

I incremented the version ID of 'vmstate_hpet' from 2 to 3 to make sure
that migrations from a QEMU process that is capable of 'driftfix' to a
QEMU process that is _not_ capable of 'driftfix' will fail. I assigned
version ID 3 to 'vmstate_hpet_timer' and to the new fields in there too
to indicate that adding those fields was the reason why the version ID
of 'vmstate_hpet' was incremented to 3.

As far as the flow of execution in vmstate_load_state() is concerned, I
think it does not matter whether the version ID of 'vmstate_hpet_timer'
and the new fields in there is 2 or 3 (as long as they are consistent).
When the 'while(field->name)' loop in vmstate_load_state() gets to the
following field in 'vmstate_hpet' ...

VMSTATE_STRUCT_VARRAY_UINT8(timer, HPETState, num_timers, 0,
vmstate_hpet_timer, HPETTimer),

... it calls itself recursively ...

if (field->flags & VMS_STRUCT) {
ret = vmstate_load_state(f, field->vmsd, addr, field->vmsd->version_id);

'field->vmsd->version_id' is the version ID of 'vmstate_hpet_timer' [1].
Hence 'vmstate_hpet_timer.version_id' is being checked against itself ...

if (version_id > vmsd->version_id) {
return -EINVAL;
}

... and the version IDs of the new fields are also being checked against
'vmstate_hpet_timer.version_id' ...

if ((field->field_exists &&
field->field_exists(opaque, version_id)) ||
(!field->field_exists &&
field->version_id <= version_id)) {

If you want me to change the version ID of 'vmstate_hpet_timer' and the
new fields in there from 3 to 2, I can do that.


Regards,

Uli


[1] Ref.: commit fa3aad24d94a6cf894db52d83f72a399324a17bb
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Ulrich Obergfell
2011-04-11 09:06:33 UTC
Permalink
Post by Anthony Liguori
Post by Ulrich Obergfell
vmstate_hpet_timer = {
VMSTATE_UINT64(fsb, HPETTimer),
VMSTATE_UINT64(period, HPETTimer),
VMSTATE_UINT8(wrap_flag, HPETTimer),
+ VMSTATE_UINT64_V(saved_period, HPETTimer, 3),
+ VMSTATE_UINT64_V(ticks_not_accounted, HPETTimer, 3),
+ VMSTATE_UINT32_V(irqs_to_inject, HPETTimer, 3),
+ VMSTATE_UINT32_V(irq_rate, HPETTimer, 3),
+ VMSTATE_UINT32_V(divisor, HPETTimer, 3),
We ought to be able to use a subsection keyed off of whether any ticks
are currently accumulated, no?
Anthony,

I'm not sure if I understand your question correctly. Are you suggesting
to migrate the driftfix-related state conditionally / only if there are
any ticks accumulated in 'ticks_not_accounted' and 'irqs_to_inject' ?

The size of the driftfix-related state is 28 bytes per timer and we have
32 timers per HPETState, i.e. 896 additional bytes per HPETState. With a
maximum number of 8 HPET blocks (HPETState), this amounts to 7168 bytes.
Hence, unconditional migration of the driftfix-related state should not
cause significant additional overhead.

Maybe I missed something. Could you please explain which benefit you see
in using a subsection ?


Regards,

Uli
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Avi Kivity
2011-04-11 09:08:02 UTC
Permalink
Post by Ulrich Obergfell
Post by Anthony Liguori
Post by Ulrich Obergfell
vmstate_hpet_timer = {
VMSTATE_UINT64(fsb, HPETTimer),
VMSTATE_UINT64(period, HPETTimer),
VMSTATE_UINT8(wrap_flag, HPETTimer),
+ VMSTATE_UINT64_V(saved_period, HPETTimer, 3),
+ VMSTATE_UINT64_V(ticks_not_accounted, HPETTimer, 3),
+ VMSTATE_UINT32_V(irqs_to_inject, HPETTimer, 3),
+ VMSTATE_UINT32_V(irq_rate, HPETTimer, 3),
+ VMSTATE_UINT32_V(divisor, HPETTimer, 3),
We ought to be able to use a subsection keyed off of whether any ticks
are currently accumulated, no?
Anthony,
I'm not sure if I understand your question correctly. Are you suggesting
to migrate the driftfix-related state conditionally / only if there are
any ticks accumulated in 'ticks_not_accounted' and 'irqs_to_inject' ?
The size of the driftfix-related state is 28 bytes per timer and we have
32 timers per HPETState, i.e. 896 additional bytes per HPETState. With a
maximum number of 8 HPET blocks (HPETState), this amounts to 7168 bytes.
Hence, unconditional migration of the driftfix-related state should not
cause significant additional overhead.
It's not about overhead.
Post by Ulrich Obergfell
Maybe I missed something. Could you please explain which benefit you see
in using a subsection ?
In the common case of there being no drift, you can migrate from a qemu
that supports driftfix to a qemu that doesn't.
--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Anthony Liguori
2011-04-11 13:10:00 UTC
Permalink
Post by Avi Kivity
Post by Ulrich Obergfell
Post by Anthony Liguori
Post by Ulrich Obergfell
vmstate_hpet_timer = {
VMSTATE_UINT64(fsb, HPETTimer),
VMSTATE_UINT64(period, HPETTimer),
VMSTATE_UINT8(wrap_flag, HPETTimer),
+ VMSTATE_UINT64_V(saved_period, HPETTimer, 3),
+ VMSTATE_UINT64_V(ticks_not_accounted, HPETTimer, 3),
+ VMSTATE_UINT32_V(irqs_to_inject, HPETTimer, 3),
+ VMSTATE_UINT32_V(irq_rate, HPETTimer, 3),
+ VMSTATE_UINT32_V(divisor, HPETTimer, 3),
We ought to be able to use a subsection keyed off of whether any
ticks
Post by Anthony Liguori
are currently accumulated, no?
Anthony,
I'm not sure if I understand your question correctly. Are you suggesting
to migrate the driftfix-related state conditionally / only if there are
any ticks accumulated in 'ticks_not_accounted' and 'irqs_to_inject' ?
The size of the driftfix-related state is 28 bytes per timer and we have
32 timers per HPETState, i.e. 896 additional bytes per HPETState. With a
maximum number of 8 HPET blocks (HPETState), this amounts to 7168 bytes.
Hence, unconditional migration of the driftfix-related state should not
cause significant additional overhead.
It's not about overhead.
Post by Ulrich Obergfell
Maybe I missed something. Could you please explain which benefit you see
in using a subsection ?
In the common case of there being no drift, you can migrate from a
qemu that supports driftfix to a qemu that doesn't.
Right, subsections are a trick. The idea is that when you introduce new
state for a device model that is not always going to be set, when you do
the migration, you detect whether the state is set or not and if it's
not set, instead of sending empty versions of that state (i.e.
missed_ticks=0) you just don't send the new state at all.

This means that you can migrate to an older version of QEMU provided the
migration would work correctly.

Regards,

Anthony Liguori

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Glauber Costa
2011-04-11 13:39:40 UTC
Permalink
Post by Anthony Liguori
Post by Avi Kivity
Post by Ulrich Obergfell
Post by Anthony Liguori
Post by Ulrich Obergfell
vmstate_hpet_timer = {
VMSTATE_UINT64(fsb, HPETTimer),
VMSTATE_UINT64(period, HPETTimer),
VMSTATE_UINT8(wrap_flag, HPETTimer),
+ VMSTATE_UINT64_V(saved_period, HPETTimer, 3),
+ VMSTATE_UINT64_V(ticks_not_accounted, HPETTimer, 3),
+ VMSTATE_UINT32_V(irqs_to_inject, HPETTimer, 3),
+ VMSTATE_UINT32_V(irq_rate, HPETTimer, 3),
+ VMSTATE_UINT32_V(divisor, HPETTimer, 3),
We ought to be able to use a subsection keyed off of whether any
ticks
Post by Anthony Liguori
are currently accumulated, no?
Anthony,
I'm not sure if I understand your question correctly. Are you suggesting
to migrate the driftfix-related state conditionally / only if there are
any ticks accumulated in 'ticks_not_accounted' and 'irqs_to_inject' ?
The size of the driftfix-related state is 28 bytes per timer and we have
32 timers per HPETState, i.e. 896 additional bytes per HPETState. With a
maximum number of 8 HPET blocks (HPETState), this amounts to 7168 bytes.
Hence, unconditional migration of the driftfix-related state should not
cause significant additional overhead.
It's not about overhead.
Post by Ulrich Obergfell
Maybe I missed something. Could you please explain which benefit you see
in using a subsection ?
In the common case of there being no drift, you can migrate from a
qemu that supports driftfix to a qemu that doesn't.
Right, subsections are a trick. The idea is that when you introduce new
state for a device model that is not always going to be set, when you do
the migration, you detect whether the state is set or not and if it's
not set, instead of sending empty versions of that state (i.e.
missed_ticks=0) you just don't send the new state at all.
This means that you can migrate to an older version of QEMU provided the
migration would work correctly.
Using subsections and testing for hpet option being disabled vs enabled,
is fine. But checking for the existence of drift, like you suggested (or
at least how I understood you), is very tricky. It is expected to change
many times during guest lifetime, and would make our migration
predictability something Heisenberg would be proud of.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Avi Kivity
2011-04-11 13:43:38 UTC
Permalink
Post by Glauber Costa
Post by Anthony Liguori
Post by Avi Kivity
Post by Ulrich Obergfell
Post by Anthony Liguori
Post by Ulrich Obergfell
vmstate_hpet_timer = {
VMSTATE_UINT64(fsb, HPETTimer),
VMSTATE_UINT64(period, HPETTimer),
VMSTATE_UINT8(wrap_flag, HPETTimer),
+ VMSTATE_UINT64_V(saved_period, HPETTimer, 3),
+ VMSTATE_UINT64_V(ticks_not_accounted, HPETTimer, 3),
+ VMSTATE_UINT32_V(irqs_to_inject, HPETTimer, 3),
+ VMSTATE_UINT32_V(irq_rate, HPETTimer, 3),
+ VMSTATE_UINT32_V(divisor, HPETTimer, 3),
We ought to be able to use a subsection keyed off of whether any
ticks
Post by Anthony Liguori
are currently accumulated, no?
Anthony,
I'm not sure if I understand your question correctly. Are you suggesting
to migrate the driftfix-related state conditionally / only if there are
any ticks accumulated in 'ticks_not_accounted' and 'irqs_to_inject' ?
The size of the driftfix-related state is 28 bytes per timer and we have
32 timers per HPETState, i.e. 896 additional bytes per HPETState. With a
maximum number of 8 HPET blocks (HPETState), this amounts to 7168 bytes.
Hence, unconditional migration of the driftfix-related state should not
cause significant additional overhead.
It's not about overhead.
Post by Ulrich Obergfell
Maybe I missed something. Could you please explain which benefit you see
in using a subsection ?
In the common case of there being no drift, you can migrate from a
qemu that supports driftfix to a qemu that doesn't.
Right, subsections are a trick. The idea is that when you introduce new
state for a device model that is not always going to be set, when you do
the migration, you detect whether the state is set or not and if it's
not set, instead of sending empty versions of that state (i.e.
missed_ticks=0) you just don't send the new state at all.
This means that you can migrate to an older version of QEMU provided the
migration would work correctly.
Using subsections and testing for hpet option being disabled vs enabled,
is fine. But checking for the existence of drift, like you suggested (or
at least how I understood you), is very tricky. It is expected to change
many times during guest lifetime, and would make our migration
predictability something Heisenberg would be proud of.
First, I'd expect no drift under normal circumstances, at least without
overcommit. We may also allow a small amount of drift to pass migration
(we lost time during the last phase anyway).

Second, the problem only occurs on new->old migrations.
--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Anthony Liguori
2011-04-11 13:47:37 UTC
Permalink
Post by Glauber Costa
Post by Anthony Liguori
Post by Avi Kivity
Post by Ulrich Obergfell
Post by Anthony Liguori
Post by Ulrich Obergfell
vmstate_hpet_timer = {
VMSTATE_UINT64(fsb, HPETTimer),
VMSTATE_UINT64(period, HPETTimer),
VMSTATE_UINT8(wrap_flag, HPETTimer),
+ VMSTATE_UINT64_V(saved_period, HPETTimer, 3),
+ VMSTATE_UINT64_V(ticks_not_accounted, HPETTimer, 3),
+ VMSTATE_UINT32_V(irqs_to_inject, HPETTimer, 3),
+ VMSTATE_UINT32_V(irq_rate, HPETTimer, 3),
+ VMSTATE_UINT32_V(divisor, HPETTimer, 3),
We ought to be able to use a subsection keyed off of whether any
ticks
Post by Anthony Liguori
are currently accumulated, no?
Anthony,
I'm not sure if I understand your question correctly. Are you suggesting
to migrate the driftfix-related state conditionally / only if there are
any ticks accumulated in 'ticks_not_accounted' and 'irqs_to_inject' ?
The size of the driftfix-related state is 28 bytes per timer and we have
32 timers per HPETState, i.e. 896 additional bytes per HPETState. With a
maximum number of 8 HPET blocks (HPETState), this amounts to 7168 bytes.
Hence, unconditional migration of the driftfix-related state should not
cause significant additional overhead.
It's not about overhead.
Post by Ulrich Obergfell
Maybe I missed something. Could you please explain which benefit you see
in using a subsection ?
In the common case of there being no drift, you can migrate from a
qemu that supports driftfix to a qemu that doesn't.
Right, subsections are a trick. The idea is that when you introduce new
state for a device model that is not always going to be set, when you do
the migration, you detect whether the state is set or not and if it's
not set, instead of sending empty versions of that state (i.e.
missed_ticks=0) you just don't send the new state at all.
This means that you can migrate to an older version of QEMU provided the
migration would work correctly.
Using subsections and testing for hpet option being disabled vs enabled,
is fine. But checking for the existence of drift, like you suggested (or
at least how I understood you), is very tricky. It is expected to change
many times during guest lifetime, and would make our migration
predictability something Heisenberg would be proud of.
Is this true? I would expect it to be very tied to workloads. For idle
workloads, you should never have accumulated missed ticks whereas with
heavy workloads, you always will have accumulated ticks.

Is that not correct?

Regards,

Anthony Liguori

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Glauber Costa
2011-04-11 13:57:48 UTC
Permalink
Post by Anthony Liguori
Post by Glauber Costa
Post by Anthony Liguori
Post by Avi Kivity
Post by Ulrich Obergfell
Post by Anthony Liguori
Post by Ulrich Obergfell
vmstate_hpet_timer = {
VMSTATE_UINT64(fsb, HPETTimer),
VMSTATE_UINT64(period, HPETTimer),
VMSTATE_UINT8(wrap_flag, HPETTimer),
+ VMSTATE_UINT64_V(saved_period, HPETTimer, 3),
+ VMSTATE_UINT64_V(ticks_not_accounted, HPETTimer, 3),
+ VMSTATE_UINT32_V(irqs_to_inject, HPETTimer, 3),
+ VMSTATE_UINT32_V(irq_rate, HPETTimer, 3),
+ VMSTATE_UINT32_V(divisor, HPETTimer, 3),
We ought to be able to use a subsection keyed off of whether any
ticks
Post by Anthony Liguori
are currently accumulated, no?
Anthony,
I'm not sure if I understand your question correctly. Are you suggesting
to migrate the driftfix-related state conditionally / only if there are
any ticks accumulated in 'ticks_not_accounted' and 'irqs_to_inject' ?
The size of the driftfix-related state is 28 bytes per timer and we have
32 timers per HPETState, i.e. 896 additional bytes per HPETState. With a
maximum number of 8 HPET blocks (HPETState), this amounts to 7168 bytes.
Hence, unconditional migration of the driftfix-related state should not
cause significant additional overhead.
It's not about overhead.
Post by Ulrich Obergfell
Maybe I missed something. Could you please explain which benefit you see
in using a subsection ?
In the common case of there being no drift, you can migrate from a
qemu that supports driftfix to a qemu that doesn't.
Right, subsections are a trick. The idea is that when you introduce new
state for a device model that is not always going to be set, when you do
the migration, you detect whether the state is set or not and if it's
not set, instead of sending empty versions of that state (i.e.
missed_ticks=0) you just don't send the new state at all.
This means that you can migrate to an older version of QEMU provided the
migration would work correctly.
Using subsections and testing for hpet option being disabled vs enabled,
is fine. But checking for the existence of drift, like you suggested (or
at least how I understood you), is very tricky. It is expected to change
many times during guest lifetime, and would make our migration
predictability something Heisenberg would be proud of.
Is this true? I would expect it to be very tied to workloads. For idle
workloads, you should never have accumulated missed ticks whereas with
heavy workloads, you always will have accumulated ticks.
Is that not correct?
Yes, it is , but we lose a lot of reliability by tying migration to the workload. Given that
we still have to start qemu the same way both sides, we end up with a
situation in which at time t, migration is possible, and at time t+1
migration is not.

I'd rather have subsections enabled at all times when the option to
allow driftfix is enabled.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Anthony Liguori
2011-04-11 13:10:52 UTC
Permalink
Post by Ulrich Obergfell
Post by Anthony Liguori
Post by Ulrich Obergfell
typedef struct HPETState {
@@ -248,7 +253,7 @@ static int hpet_post_load(void *opaque, int version_id)
static const VMStateDescription vmstate_hpet_timer = {
.name = "hpet_timer",
- .version_id = 1,
+ .version_id = 3,
Why jump from 1 to 3?
Post by Ulrich Obergfell
.minimum_version_id = 1,
.minimum_version_id_old = 1,
.fields = (VMStateField []) {
@@ -258,6 +263,11 @@ static const VMStateDescription
vmstate_hpet_timer = {
VMSTATE_UINT64(fsb, HPETTimer),
VMSTATE_UINT64(period, HPETTimer),
VMSTATE_UINT8(wrap_flag, HPETTimer),
+ VMSTATE_UINT64_V(saved_period, HPETTimer, 3),
+ VMSTATE_UINT64_V(ticks_not_accounted, HPETTimer, 3),
+ VMSTATE_UINT32_V(irqs_to_inject, HPETTimer, 3),
+ VMSTATE_UINT32_V(irq_rate, HPETTimer, 3),
+ VMSTATE_UINT32_V(divisor, HPETTimer, 3),
Anthony,
I incremented the version ID of 'vmstate_hpet' from 2 to 3 to make sure
that migrations from a QEMU process that is capable of 'driftfix' to a
QEMU process that is _not_ capable of 'driftfix' will fail. I assigned
version ID 3 to 'vmstate_hpet_timer' and to the new fields in there too
to indicate that adding those fields was the reason why the version ID
of 'vmstate_hpet' was incremented to 3.
As far as the flow of execution in vmstate_load_state() is concerned, I
think it does not matter whether the version ID of 'vmstate_hpet_timer'
and the new fields in there is 2 or 3 (as long as they are consistent).
When the 'while(field->name)' loop in vmstate_load_state() gets to the
following field in 'vmstate_hpet' ...
VMSTATE_STRUCT_VARRAY_UINT8(timer, HPETState, num_timers, 0,
vmstate_hpet_timer, HPETTimer),
... it calls itself recursively ...
if (field->flags& VMS_STRUCT) {
ret = vmstate_load_state(f, field->vmsd, addr, field->vmsd->version_id);
'field->vmsd->version_id' is the version ID of 'vmstate_hpet_timer' [1].
Hence 'vmstate_hpet_timer.version_id' is being checked against itself ...
if (version_id> vmsd->version_id) {
return -EINVAL;
}
... and the version IDs of the new fields are also being checked against
'vmstate_hpet_timer.version_id' ...
if ((field->field_exists&&
field->field_exists(opaque, version_id)) ||
(!field->field_exists&&
field->version_id<= version_id)) {
If you want me to change the version ID of 'vmstate_hpet_timer' and the
new fields in there from 3 to 2, I can do that.
It avoids surprises so I think it's a reasonable thing to do. But yes,
your analysis is correct.

Regards,

Anthony Liguori
Post by Ulrich Obergfell
Regards,
Uli
[1] Ref.: commit fa3aad24d94a6cf894db52d83f72a399324a17bb
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Ulrich Obergfell
2011-04-08 15:20:41 UTC
Permalink
update_irq() uses a similar method as in 'rtc_td_hack' to detect
coalesced interrupts. The function entry addresses are retrieved
from 'target_get_irq_delivered' and 'target_reset_irq_delivered'.

Signed-off-by: Ulrich Obergfell <***@redhat.com>
---
hw/hpet.c | 15 +++++++++++++--
1 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/hw/hpet.c b/hw/hpet.c
index c150da5..b5625fb 100644
--- a/hw/hpet.c
+++ b/hw/hpet.c
@@ -31,6 +31,7 @@
#include "hpet_emul.h"
#include "sysbus.h"
#include "mc146818rtc.h"
+#include "sysemu.h"

//#define HPET_DEBUG
#ifdef HPET_DEBUG
@@ -176,11 +177,12 @@ static inline uint64_t hpet_calculate_diff(HPETTimer *t, uint64_t current)
}
}

-static void update_irq(struct HPETTimer *timer, int set)
+static int update_irq(struct HPETTimer *timer, int set)
{
uint64_t mask;
HPETState *s;
int route;
+ int irq_delivered = 1;

if (timer->tn <= 1 && hpet_in_legacy_mode(timer->state)) {
/* if LegacyReplacementRoute bit is set, HPET specification requires
@@ -205,8 +207,17 @@ static void update_irq(struct HPETTimer *timer, int set)
qemu_irq_raise(s->irqs[route]);
} else {
s->isr &= ~mask;
- qemu_irq_pulse(s->irqs[route]);
+ if (s->driftfix && target_get_irq_delivered
+ && target_reset_irq_delivered) {
+ target_reset_irq_delivered();
+ qemu_irq_raise(s->irqs[route]);
+ irq_delivered = target_get_irq_delivered();
+ qemu_irq_lower(s->irqs[route]);
+ } else {
+ qemu_irq_pulse(s->irqs[route]);
+ }
}
+ return irq_delivered;
}

static void hpet_pre_save(void *opaque)
--
1.6.2.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Loading...