aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGleb Smirnoff <glebius@FreeBSD.org>2016-10-25 16:45:55 +0000
committerGleb Smirnoff <glebius@FreeBSD.org>2016-10-25 16:45:55 +0000
commit1bd8be53ecd4c45f2224dfd03e7312ed57ac8d32 (patch)
treeed138ed2390abcb3994256f73b9f3d2c5b5d2557
parentb8dce58e29ea34abc3f23717b1bf7b71696a78e1 (diff)
downloadsrc-1bd8be53ecd4c45f2224dfd03e7312ed57ac8d32.tar.gz
src-1bd8be53ecd4c45f2224dfd03e7312ed57ac8d32.zip
EN-16:17: virtual memory issues.
Due to increased parallelism and optimizations in several parts of the system, the previously latent bugs in VM become much easier to trigger, affecting a significant number of the FreeBSD users. The exact technical details of the issues are provided in the commit messages of the merged revisions, which are listed below with short summaries. r301184 prevent parallel object collapses, fixes object lifecycle r301436 do not leak the vm object lock, fixes overcommit disable r302243 avoid the active object marking for vm.vmtotal sysctl, fixes "vodead" hangs r302513 vm_fault() race with the vm_object_collapse(), fixes spurious SIGSEGV r303291 postpone BO_DEAD, fixes panic on fast vnode reclaim Approved by: so
Notes
Notes: svn path=/releng/10.3/; revision=307929
-rw-r--r--sys/kern/vfs_subr.c8
-rw-r--r--sys/vm/vm_fault.c15
-rw-r--r--sys/vm/vm_meter.c82
-rw-r--r--sys/vm/vm_object.c10
-rw-r--r--sys/vm/vm_object.h1
5 files changed, 65 insertions, 51 deletions
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index aa8131306945..85af62ea6867 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -2934,7 +2934,13 @@ vgonel(struct vnode *vp)
TAILQ_EMPTY(&vp->v_bufobj.bo_clean.bv_hd) &&
vp->v_bufobj.bo_clean.bv_cnt == 0,
("vp %p bufobj not invalidated", vp));
- vp->v_bufobj.bo_flag |= BO_DEAD;
+
+ /*
+ * For VMIO bufobj, BO_DEAD is set in vm_object_terminate()
+ * after the object's page queue is flushed.
+ */
+ if (vp->v_bufobj.bo_object == NULL)
+ vp->v_bufobj.bo_flag |= BO_DEAD;
BO_UNLOCK(&vp->v_bufobj);
/*
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index b5ac58f35dfe..d65cda2ad0ed 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -286,7 +286,7 @@ vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
vm_prot_t prot;
long ahead, behind;
int alloc_req, era, faultcount, nera, reqpage, result;
- boolean_t growstack, is_first_object_locked, wired;
+ boolean_t dead, growstack, is_first_object_locked, wired;
int map_generation;
vm_object_t next_object;
vm_page_t marray[VM_FAULT_READ_MAX];
@@ -423,11 +423,18 @@ fast_failed:
fs.pindex = fs.first_pindex;
while (TRUE) {
/*
- * If the object is dead, we stop here
+ * If the object is marked for imminent termination,
+ * we retry here, since the collapse pass has raced
+ * with us. Otherwise, if we see terminally dead
+ * object, return fail.
*/
- if (fs.object->flags & OBJ_DEAD) {
+ if ((fs.object->flags & OBJ_DEAD) != 0) {
+ dead = fs.object->type == OBJT_DEAD;
unlock_and_deallocate(&fs);
- return (KERN_PROTECTION_FAILURE);
+ if (dead)
+ return (KERN_PROTECTION_FAILURE);
+ pause("vmf_de", 1);
+ goto RetryFault;
}
/*
diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c
index 75974c663997..3f8ba38fd911 100644
--- a/sys/vm/vm_meter.c
+++ b/sys/vm/vm_meter.c
@@ -93,29 +93,31 @@ SYSCTL_PROC(_vm, VM_LOADAVG, loadavg, CTLTYPE_STRUCT | CTLFLAG_RD |
CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_loadavg, "S,loadavg",
"Machine loadaverage history");
+/*
+ * This function aims to determine if the object is mapped,
+ * specifically, if it is referenced by a vm_map_entry. Because
+ * objects occasionally acquire transient references that do not
+ * represent a mapping, the method used here is inexact. However, it
+ * has very low overhead and is good enough for the advisory
+ * vm.vmtotal sysctl.
+ */
+static bool
+is_object_active(vm_object_t obj)
+{
+
+ return (obj->ref_count > obj->shadow_count);
+}
+
static int
vmtotal(SYSCTL_HANDLER_ARGS)
{
- struct proc *p;
struct vmtotal total;
- vm_map_entry_t entry;
vm_object_t object;
- vm_map_t map;
- int paging;
+ struct proc *p;
struct thread *td;
- struct vmspace *vm;
bzero(&total, sizeof(total));
- /*
- * Mark all objects as inactive.
- */
- mtx_lock(&vm_object_list_mtx);
- TAILQ_FOREACH(object, &vm_object_list, object_list) {
- VM_OBJECT_WLOCK(object);
- vm_object_clear_flag(object, OBJ_ACTIVE);
- VM_OBJECT_WUNLOCK(object);
- }
- mtx_unlock(&vm_object_list_mtx);
+
/*
* Calculate process statistics.
*/
@@ -136,11 +138,15 @@ vmtotal(SYSCTL_HANDLER_ARGS)
case TDS_INHIBITED:
if (TD_IS_SWAPPED(td))
total.t_sw++;
- else if (TD_IS_SLEEPING(td) &&
- td->td_priority <= PZERO)
- total.t_dw++;
- else
- total.t_sl++;
+ else if (TD_IS_SLEEPING(td)) {
+ if (td->td_priority <= PZERO)
+ total.t_dw++;
+ else
+ total.t_sl++;
+ if (td->td_wchan ==
+ &cnt.v_free_count)
+ total.t_pw++;
+ }
break;
case TDS_CAN_RUN:
@@ -158,29 +164,6 @@ vmtotal(SYSCTL_HANDLER_ARGS)
}
}
PROC_UNLOCK(p);
- /*
- * Note active objects.
- */
- paging = 0;
- vm = vmspace_acquire_ref(p);
- if (vm == NULL)
- continue;
- map = &vm->vm_map;
- vm_map_lock_read(map);
- for (entry = map->header.next;
- entry != &map->header; entry = entry->next) {
- if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) ||
- (object = entry->object.vm_object) == NULL)
- continue;
- VM_OBJECT_WLOCK(object);
- vm_object_set_flag(object, OBJ_ACTIVE);
- paging |= object->paging_in_progress;
- VM_OBJECT_WUNLOCK(object);
- }
- vm_map_unlock_read(map);
- vmspace_free(vm);
- if (paging)
- total.t_pw++;
}
sx_sunlock(&allproc_lock);
/*
@@ -206,9 +189,18 @@ vmtotal(SYSCTL_HANDLER_ARGS)
*/
continue;
}
+ if (object->ref_count == 1 &&
+ (object->flags & OBJ_NOSPLIT) != 0) {
+ /*
+ * Also skip otherwise unreferenced swap
+ * objects backing tmpfs vnodes, and POSIX or
+ * SysV shared memory.
+ */
+ continue;
+ }
total.t_vm += object->size;
total.t_rm += object->resident_page_count;
- if (object->flags & OBJ_ACTIVE) {
+ if (is_object_active(object)) {
total.t_avm += object->size;
total.t_arm += object->resident_page_count;
}
@@ -216,7 +208,7 @@ vmtotal(SYSCTL_HANDLER_ARGS)
/* shared object */
total.t_vmshr += object->size;
total.t_rmshr += object->resident_page_count;
- if (object->flags & OBJ_ACTIVE) {
+ if (is_object_active(object)) {
total.t_avmshr += object->size;
total.t_armshr += object->resident_page_count;
}
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index e9d12806e543..847056f5323a 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -737,6 +737,10 @@ vm_object_terminate(vm_object_t object)
vinvalbuf(vp, V_SAVE, 0, 0);
+ BO_LOCK(&vp->v_bufobj);
+ vp->v_bufobj.bo_flag |= BO_DEAD;
+ BO_UNLOCK(&vp->v_bufobj);
+
VM_OBJECT_WLOCK(object);
}
@@ -1722,6 +1726,9 @@ vm_object_collapse(vm_object_t object)
* case.
*/
if (backing_object->ref_count == 1) {
+ vm_object_pip_add(object, 1);
+ vm_object_pip_add(backing_object, 1);
+
/*
* If there is exactly one reference to the backing
* object, we can collapse it into the parent.
@@ -1793,11 +1800,13 @@ vm_object_collapse(vm_object_t object)
KASSERT(backing_object->ref_count == 1, (
"backing_object %p was somehow re-referenced during collapse!",
backing_object));
+ vm_object_pip_wakeup(backing_object);
backing_object->type = OBJT_DEAD;
backing_object->ref_count = 0;
VM_OBJECT_WUNLOCK(backing_object);
vm_object_destroy(backing_object);
+ vm_object_pip_wakeup(object);
object_collapses++;
} else {
vm_object_t new_backing_object;
@@ -2130,6 +2139,7 @@ vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset,
*/
if (!reserved && !swap_reserve_by_cred(ptoa(next_size),
prev_object->cred)) {
+ VM_OBJECT_WUNLOCK(prev_object);
return (FALSE);
}
prev_object->charge += ptoa(next_size);
diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h
index ac8feaecf3c5..f45aa9e01183 100644
--- a/sys/vm/vm_object.h
+++ b/sys/vm/vm_object.h
@@ -181,7 +181,6 @@ struct vm_object {
*/
#define OBJ_FICTITIOUS 0x0001 /* (c) contains fictitious pages */
#define OBJ_UNMANAGED 0x0002 /* (c) contains unmanaged pages */
-#define OBJ_ACTIVE 0x0004 /* active objects */
#define OBJ_DEAD 0x0008 /* dead objects (during rundown) */
#define OBJ_NOSPLIT 0x0010 /* dont split this object */
#define OBJ_PIPWNT 0x0040 /* paging in progress wanted */