add idl4k kernel firmware version 1.13.0.105

This commit is contained in:
Jaroslav Kysela
2015-03-26 17:22:37 +01:00
parent 5194d2792e
commit e9070cdc77
31064 changed files with 12769984 additions and 0 deletions

6
kernel/arch/x86/vdso/.gitignore vendored Normal file
View File

@@ -0,0 +1,6 @@
vdso.lds
vdso-syms.lds
vdso32-syms.lds
vdso32-syscall-syms.lds
vdso32-sysenter-syms.lds
vdso32-int80-syms.lds

View File

@@ -0,0 +1,140 @@
#
# Building vDSO images for x86.
#
VDSO64-$(CONFIG_X86_64) := y
VDSO32-$(CONFIG_X86_32) := y
VDSO32-$(CONFIG_COMPAT) := y
vdso-install-$(VDSO64-y) += vdso.so
vdso-install-$(VDSO32-y) += $(vdso32-images)
# files to link into the vdso
vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vvar.o
# files to link into kernel
obj-$(VDSO64-y) += vma.o vdso.o
obj-$(VDSO32-y) += vdso32.o vdso32-setup.o
vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
$(obj)/vdso.o: $(obj)/vdso.so
targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y)
export CPPFLAGS_vdso.lds += -P -C
VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
-Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so
$(obj)/vdso.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
$(call if_changed,vdso)
$(obj)/%.so: OBJCOPYFLAGS := -S
$(obj)/%.so: $(obj)/%.so.dbg FORCE
$(call if_changed,objcopy)
CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
$(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector)
$(vobjs): KBUILD_CFLAGS += $(CFL)
targets += vdso-syms.lds
obj-$(VDSO64-y) += vdso-syms.lds
#
# Match symbols in the DSO that look like VDSO*; produce a file of constants.
#
sed-vdsosym := -e 's/^00*/0/' \
-e 's/^\([0-9a-fA-F]*\) . \(VDSO[a-zA-Z0-9_]*\)$$/\2 = 0x\1;/p'
quiet_cmd_vdsosym = VDSOSYM $@
define cmd_vdsosym
$(NM) $< | LC_ALL=C sed -n $(sed-vdsosym) | LC_ALL=C sort > $@
endef
$(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE
$(call if_changed,vdsosym)
#
# Build multiple 32-bit vDSO images to choose from at boot time.
#
obj-$(VDSO32-y) += vdso32-syms.lds
vdso32.so-$(VDSO32-y) += int80
vdso32.so-$(CONFIG_COMPAT) += syscall
vdso32.so-$(VDSO32-y) += sysenter
vdso32-images = $(vdso32.so-y:%=vdso32-%.so)
CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-soname=linux-gate.so.1
# This makes sure the $(obj) subdirectory exists even though vdso32/
# is not a kbuild sub-make subdirectory.
override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
targets += vdso32/vdso32.lds
targets += $(vdso32-images) $(vdso32-images:=.dbg)
targets += vdso32/note.o $(vdso32.so-y:%=vdso32/%.o)
extra-y += $(vdso32-images)
$(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%)
KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
$(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
$(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32
$(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
$(obj)/vdso32/vdso32.lds \
$(obj)/vdso32/note.o \
$(obj)/vdso32/%.o
$(call if_changed,vdso)
# Make vdso32-*-syms.lds from each image, and then make sure they match.
# The only difference should be that some do not define VDSO32_SYSENTER_RETURN.
targets += vdso32-syms.lds $(vdso32.so-y:%=vdso32-%-syms.lds)
quiet_cmd_vdso32sym = VDSOSYM $@
define cmd_vdso32sym
if LC_ALL=C sort -u $(filter-out FORCE,$^) > $(@D)/.tmp_$(@F) && \
$(foreach H,$(filter-out FORCE,$^),\
if grep -q VDSO32_SYSENTER_RETURN $H; \
then diff -u $(@D)/.tmp_$(@F) $H; \
else sed /VDSO32_SYSENTER_RETURN/d $(@D)/.tmp_$(@F) | \
diff -u - $H; fi &&) : ;\
then mv -f $(@D)/.tmp_$(@F) $@; \
else rm -f $(@D)/.tmp_$(@F); exit 1; \
fi
endef
$(obj)/vdso32-syms.lds: $(vdso32.so-y:%=$(obj)/vdso32-%-syms.lds) FORCE
$(call if_changed,vdso32sym)
#
# The DSO images are built using a special linker script.
#
quiet_cmd_vdso = VDSO $@
cmd_vdso = $(CC) -nostdlib -o $@ \
$(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
-Wl,-T,$(filter %.lds,$^) $(filter %.o,$^)
VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
GCOV_PROFILE := n
#
# Install the unstripped copy of vdso*.so listed in $(vdso-install-y).
#
quiet_cmd_vdso_install = INSTALL $@
cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
$(vdso-install-y): %.so: $(obj)/%.so.dbg FORCE
@mkdir -p $(MODLIB)/vdso
$(call cmd,vdso_install)
PHONY += vdso_install $(vdso-install-y)
vdso_install: $(vdso-install-y)
clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80*

View File

@@ -0,0 +1,159 @@
/*
* Copyright 2006 Andi Kleen, SUSE Labs.
* Subject to the GNU Public License, v.2
*
* Fast user context implementation of clock_gettime and gettimeofday.
*
* The code should have no internal unresolved relocations.
* Check with readelf after changing.
* Also alternative() doesn't work.
*/
/* Disable profiling for userspace code: */
#define DISABLE_BRANCH_PROFILING
#include <linux/kernel.h>
#include <linux/posix-timers.h>
#include <linux/time.h>
#include <linux/string.h>
#include <asm/vsyscall.h>
#include <asm/vgtod.h>
#include <asm/timex.h>
#include <asm/hpet.h>
#include <asm/unistd.h>
#include <asm/io.h>
#include "vextern.h"
#define gtod vdso_vsyscall_gtod_data
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
long ret;
asm("syscall" : "=a" (ret) :
"0" (__NR_clock_gettime),"D" (clock), "S" (ts) : "memory");
return ret;
}
notrace static inline long vgetns(void)
{
long v;
cycles_t (*vread)(void);
vread = gtod->clock.vread;
v = (vread() - gtod->clock.cycle_last) & gtod->clock.mask;
return (v * gtod->clock.mult) >> gtod->clock.shift;
}
notrace static noinline int do_realtime(struct timespec *ts)
{
unsigned long seq, ns;
do {
seq = read_seqbegin(&gtod->lock);
ts->tv_sec = gtod->wall_time_sec;
ts->tv_nsec = gtod->wall_time_nsec;
ns = vgetns();
} while (unlikely(read_seqretry(&gtod->lock, seq)));
timespec_add_ns(ts, ns);
return 0;
}
/* Copy of the version in kernel/time.c which we cannot directly access */
notrace static void
vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
{
while (nsec >= NSEC_PER_SEC) {
nsec -= NSEC_PER_SEC;
++sec;
}
while (nsec < 0) {
nsec += NSEC_PER_SEC;
--sec;
}
ts->tv_sec = sec;
ts->tv_nsec = nsec;
}
notrace static noinline int do_monotonic(struct timespec *ts)
{
unsigned long seq, ns, secs;
do {
seq = read_seqbegin(&gtod->lock);
secs = gtod->wall_time_sec;
ns = gtod->wall_time_nsec + vgetns();
secs += gtod->wall_to_monotonic.tv_sec;
ns += gtod->wall_to_monotonic.tv_nsec;
} while (unlikely(read_seqretry(&gtod->lock, seq)));
vset_normalized_timespec(ts, secs, ns);
return 0;
}
notrace static noinline int do_realtime_coarse(struct timespec *ts)
{
unsigned long seq;
do {
seq = read_seqbegin(&gtod->lock);
ts->tv_sec = gtod->wall_time_coarse.tv_sec;
ts->tv_nsec = gtod->wall_time_coarse.tv_nsec;
} while (unlikely(read_seqretry(&gtod->lock, seq)));
return 0;
}
notrace static noinline int do_monotonic_coarse(struct timespec *ts)
{
unsigned long seq, ns, secs;
do {
seq = read_seqbegin(&gtod->lock);
secs = gtod->wall_time_coarse.tv_sec;
ns = gtod->wall_time_coarse.tv_nsec;
secs += gtod->wall_to_monotonic.tv_sec;
ns += gtod->wall_to_monotonic.tv_nsec;
} while (unlikely(read_seqretry(&gtod->lock, seq)));
vset_normalized_timespec(ts, secs, ns);
return 0;
}
notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
{
if (likely(gtod->sysctl_enabled))
switch (clock) {
case CLOCK_REALTIME:
if (likely(gtod->clock.vread))
return do_realtime(ts);
break;
case CLOCK_MONOTONIC:
if (likely(gtod->clock.vread))
return do_monotonic(ts);
break;
case CLOCK_REALTIME_COARSE:
return do_realtime_coarse(ts);
case CLOCK_MONOTONIC_COARSE:
return do_monotonic_coarse(ts);
}
return vdso_fallback_gettime(clock, ts);
}
int clock_gettime(clockid_t, struct timespec *)
__attribute__((weak, alias("__vdso_clock_gettime")));
notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
{
long ret;
if (likely(gtod->sysctl_enabled && gtod->clock.vread)) {
if (likely(tv != NULL)) {
BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
offsetof(struct timespec, tv_nsec) ||
sizeof(*tv) != sizeof(struct timespec));
do_realtime((struct timespec *)tv);
tv->tv_usec /= 1000;
}
if (unlikely(tz != NULL)) {
/* Avoid memcpy. Some old compilers fail to inline it */
tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest;
tz->tz_dsttime = gtod->sys_tz.tz_dsttime;
}
return 0;
}
asm("syscall" : "=a" (ret) :
"0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
return ret;
}
int gettimeofday(struct timeval *, struct timezone *)
__attribute__((weak, alias("__vdso_gettimeofday")));

View File

@@ -0,0 +1,64 @@
/*
* Linker script for vDSO. This is an ELF shared object prelinked to
* its virtual address, and with only one read-only segment.
* This script controls its layout.
*/
SECTIONS
{
. = VDSO_PRELINK + SIZEOF_HEADERS;
.hash : { *(.hash) } :text
.gnu.hash : { *(.gnu.hash) }
.dynsym : { *(.dynsym) }
.dynstr : { *(.dynstr) }
.gnu.version : { *(.gnu.version) }
.gnu.version_d : { *(.gnu.version_d) }
.gnu.version_r : { *(.gnu.version_r) }
.note : { *(.note.*) } :text :note
.eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
.eh_frame : { KEEP (*(.eh_frame)) } :text
.dynamic : { *(.dynamic) } :text :dynamic
.rodata : { *(.rodata*) } :text
.data : {
*(.data*)
*(.sdata*)
*(.got.plt) *(.got)
*(.gnu.linkonce.d.*)
*(.bss*)
*(.dynbss*)
*(.gnu.linkonce.b.*)
}
.altinstructions : { *(.altinstructions) }
.altinstr_replacement : { *(.altinstr_replacement) }
/*
* Align the actual code well away from the non-instruction data.
* This is the best thing for the I-cache.
*/
. = ALIGN(0x100);
.text : { *(.text*) } :text =0x90909090
}
/*
* Very old versions of ld do not recognize this name token; use the constant.
*/
#define PT_GNU_EH_FRAME 0x6474e550
/*
* We must supply the ELF program headers explicitly to get just one
* PT_LOAD segment, and set the flags explicitly to make segments read-only.
*/
PHDRS
{
text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
note PT_NOTE FLAGS(4); /* PF_R */
eh_frame_hdr PT_GNU_EH_FRAME;
}

View File

@@ -0,0 +1,12 @@
/*
* This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
* Here we can supply some information useful to userland.
*/
#include <linux/uts.h>
#include <linux/version.h>
#include <linux/elfnote.h>
ELFNOTE_START(Linux, 0, "a")
.long LINUX_VERSION_CODE
ELFNOTE_END

View File

@@ -0,0 +1,10 @@
#include <linux/init.h>
__INITDATA
.globl vdso_start, vdso_end
vdso_start:
.incbin "arch/x86/vdso/vdso.so"
vdso_end:
__FINIT

View File

@@ -0,0 +1,37 @@
/*
* Linker script for 64-bit vDSO.
* We #include the file to define the layout details.
* Here we only choose the prelinked virtual address.
*
* This file defines the version script giving the user-exported symbols in
* the DSO. We can define local symbols here called VDSO* to make their
* values visible using the asm-x86/vdso.h macros from the kernel proper.
*/
#define VDSO_PRELINK 0xffffffffff700000
#include "vdso-layout.lds.S"
/*
* This controls what userland symbols we export from the vDSO.
*/
VERSION {
LINUX_2.6 {
global:
clock_gettime;
__vdso_clock_gettime;
gettimeofday;
__vdso_gettimeofday;
getcpu;
__vdso_getcpu;
local: *;
};
}
VDSO64_PRELINK = VDSO_PRELINK;
/*
* Define VDSO64_x for each VEXTERN(x), for use via VDSO64_SYMBOL.
*/
#define VEXTERN(x) VDSO64_ ## x = vdso_ ## x;
#include "vextern.h"
#undef VEXTERN

View File

@@ -0,0 +1,443 @@
/*
* (C) Copyright 2002 Linus Torvalds
* Portions based on the vdso-randomization code from exec-shield:
* Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar
*
* This file contains the needed initializations to support sysenter.
*/
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/thread_info.h>
#include <linux/sched.h>
#include <linux/gfp.h>
#include <linux/string.h>
#include <linux/elf.h>
#include <linux/mm.h>
#include <linux/err.h>
#include <linux/module.h>
#include <asm/cpufeature.h>
#include <asm/msr.h>
#include <asm/pgtable.h>
#include <asm/unistd.h>
#include <asm/elf.h>
#include <asm/tlbflush.h>
#include <asm/vdso.h>
#include <asm/proto.h>
enum {
VDSO_DISABLED = 0,
VDSO_ENABLED = 1,
VDSO_COMPAT = 2,
};
#ifdef CONFIG_COMPAT_VDSO
#define VDSO_DEFAULT VDSO_COMPAT
#else
#define VDSO_DEFAULT VDSO_ENABLED
#endif
#ifdef CONFIG_X86_64
#define vdso_enabled sysctl_vsyscall32
#define arch_setup_additional_pages syscall32_setup_pages
#endif
/*
* This is the difference between the prelinked addresses in the vDSO images
* and the VDSO_HIGH_BASE address where CONFIG_COMPAT_VDSO places the vDSO
* in the user address space.
*/
#define VDSO_ADDR_ADJUST (VDSO_HIGH_BASE - (unsigned long)VDSO32_PRELINK)
/*
* Should the kernel map a VDSO page into processes and pass its
* address down to glibc upon exec()?
*/
unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT;
static int __init vdso_setup(char *s)
{
vdso_enabled = simple_strtoul(s, NULL, 0);
return 1;
}
/*
* For consistency, the argument vdso32=[012] affects the 32-bit vDSO
* behavior on both 64-bit and 32-bit kernels.
* On 32-bit kernels, vdso=[012] means the same thing.
*/
__setup("vdso32=", vdso_setup);
#ifdef CONFIG_X86_32
__setup_param("vdso=", vdso32_setup, vdso_setup, 0);
EXPORT_SYMBOL_GPL(vdso_enabled);
#endif
static __init void reloc_symtab(Elf32_Ehdr *ehdr,
unsigned offset, unsigned size)
{
Elf32_Sym *sym = (void *)ehdr + offset;
unsigned nsym = size / sizeof(*sym);
unsigned i;
for(i = 0; i < nsym; i++, sym++) {
if (sym->st_shndx == SHN_UNDEF ||
sym->st_shndx == SHN_ABS)
continue; /* skip */
if (sym->st_shndx > SHN_LORESERVE) {
printk(KERN_INFO "VDSO: unexpected st_shndx %x\n",
sym->st_shndx);
continue;
}
switch(ELF_ST_TYPE(sym->st_info)) {
case STT_OBJECT:
case STT_FUNC:
case STT_SECTION:
case STT_FILE:
sym->st_value += VDSO_ADDR_ADJUST;
}
}
}
static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset)
{
Elf32_Dyn *dyn = (void *)ehdr + offset;
for(; dyn->d_tag != DT_NULL; dyn++)
switch(dyn->d_tag) {
case DT_PLTGOT:
case DT_HASH:
case DT_STRTAB:
case DT_SYMTAB:
case DT_RELA:
case DT_INIT:
case DT_FINI:
case DT_REL:
case DT_DEBUG:
case DT_JMPREL:
case DT_VERSYM:
case DT_VERDEF:
case DT_VERNEED:
case DT_ADDRRNGLO ... DT_ADDRRNGHI:
/* definitely pointers needing relocation */
dyn->d_un.d_ptr += VDSO_ADDR_ADJUST;
break;
case DT_ENCODING ... OLD_DT_LOOS-1:
case DT_LOOS ... DT_HIOS-1:
/* Tags above DT_ENCODING are pointers if
they're even */
if (dyn->d_tag >= DT_ENCODING &&
(dyn->d_tag & 1) == 0)
dyn->d_un.d_ptr += VDSO_ADDR_ADJUST;
break;
case DT_VERDEFNUM:
case DT_VERNEEDNUM:
case DT_FLAGS_1:
case DT_RELACOUNT:
case DT_RELCOUNT:
case DT_VALRNGLO ... DT_VALRNGHI:
/* definitely not pointers */
break;
case OLD_DT_LOOS ... DT_LOOS-1:
case DT_HIOS ... DT_VALRNGLO-1:
default:
if (dyn->d_tag > DT_ENCODING)
printk(KERN_INFO "VDSO: unexpected DT_tag %x\n",
dyn->d_tag);
break;
}
}
static __init void relocate_vdso(Elf32_Ehdr *ehdr)
{
Elf32_Phdr *phdr;
Elf32_Shdr *shdr;
int i;
BUG_ON(memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0 ||
!elf_check_arch_ia32(ehdr) ||
ehdr->e_type != ET_DYN);
ehdr->e_entry += VDSO_ADDR_ADJUST;
/* rebase phdrs */
phdr = (void *)ehdr + ehdr->e_phoff;
for (i = 0; i < ehdr->e_phnum; i++) {
phdr[i].p_vaddr += VDSO_ADDR_ADJUST;
/* relocate dynamic stuff */
if (phdr[i].p_type == PT_DYNAMIC)
reloc_dyn(ehdr, phdr[i].p_offset);
}
/* rebase sections */
shdr = (void *)ehdr + ehdr->e_shoff;
for(i = 0; i < ehdr->e_shnum; i++) {
if (!(shdr[i].sh_flags & SHF_ALLOC))
continue;
shdr[i].sh_addr += VDSO_ADDR_ADJUST;
if (shdr[i].sh_type == SHT_SYMTAB ||
shdr[i].sh_type == SHT_DYNSYM)
reloc_symtab(ehdr, shdr[i].sh_offset,
shdr[i].sh_size);
}
}
static struct page *vdso32_pages[1];
#ifdef CONFIG_X86_64
#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SYSENTER32))
#define vdso32_syscall() (boot_cpu_has(X86_FEATURE_SYSCALL32))
/* May not be __init: called during resume */
void syscall32_cpu_init(void)
{
/* Load these always in case some future AMD CPU supports
SYSENTER from compat mode too. */
checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL);
checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
wrmsrl(MSR_CSTAR, ia32_cstar_target);
}
#define compat_uses_vma 1
static inline void map_compat_vdso(int map)
{
}
#else /* CONFIG_X86_32 */
#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP))
#define vdso32_syscall() (0)
void enable_sep_cpu(void)
{
int cpu = get_cpu();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
if (!boot_cpu_has(X86_FEATURE_SEP)) {
put_cpu();
return;
}
tss->x86_tss.ss1 = __KERNEL_CS;
tss->x86_tss.sp1 = sizeof(struct tss_struct) + (unsigned long) tss;
wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0);
wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0);
put_cpu();
}
static struct vm_area_struct gate_vma;
static int __init gate_vma_init(void)
{
gate_vma.vm_mm = NULL;
gate_vma.vm_start = FIXADDR_USER_START;
gate_vma.vm_end = FIXADDR_USER_END;
gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
gate_vma.vm_page_prot = __P101;
/*
* Make sure the vDSO gets into every core dump.
* Dumping its contents makes post-mortem fully interpretable later
* without matching up the same kernel and hardware config to see
* what PC values meant.
*/
gate_vma.vm_flags |= VM_ALWAYSDUMP;
return 0;
}
#define compat_uses_vma 0
static void map_compat_vdso(int map)
{
static int vdso_mapped;
if (map == vdso_mapped)
return;
vdso_mapped = map;
__set_fixmap(FIX_VDSO, page_to_pfn(vdso32_pages[0]) << PAGE_SHIFT,
map ? PAGE_READONLY_EXEC : PAGE_NONE);
/* flush stray tlbs */
flush_tlb_all();
}
#endif /* CONFIG_X86_64 */
int __init sysenter_setup(void)
{
void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
const void *vsyscall;
size_t vsyscall_len;
vdso32_pages[0] = virt_to_page(syscall_page);
#ifdef CONFIG_X86_32
gate_vma_init();
#endif
if (vdso32_syscall()) {
vsyscall = &vdso32_syscall_start;
vsyscall_len = &vdso32_syscall_end - &vdso32_syscall_start;
} else if (vdso32_sysenter()){
vsyscall = &vdso32_sysenter_start;
vsyscall_len = &vdso32_sysenter_end - &vdso32_sysenter_start;
} else {
vsyscall = &vdso32_int80_start;
vsyscall_len = &vdso32_int80_end - &vdso32_int80_start;
}
memcpy(syscall_page, vsyscall, vsyscall_len);
relocate_vdso(syscall_page);
return 0;
}
/* Setup a VMA at program startup for the vsyscall page */
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
struct mm_struct *mm = current->mm;
unsigned long addr;
int ret = 0;
bool compat;
if (vdso_enabled == VDSO_DISABLED)
return 0;
down_write(&mm->mmap_sem);
/* Test compat mode once here, in case someone
changes it via sysctl */
compat = (vdso_enabled == VDSO_COMPAT);
map_compat_vdso(compat);
if (compat)
addr = VDSO_HIGH_BASE;
else {
addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
if (IS_ERR_VALUE(addr)) {
ret = addr;
goto up_fail;
}
}
current->mm->context.vdso = (void *)addr;
if (compat_uses_vma || !compat) {
/*
* MAYWRITE to allow gdb to COW and set breakpoints
*
* Make sure the vDSO gets into every core dump.
* Dumping its contents makes post-mortem fully
* interpretable later without matching up the same
* kernel and hardware config to see what PC values
* meant.
*/
ret = install_special_mapping(mm, addr, PAGE_SIZE,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
VM_ALWAYSDUMP,
vdso32_pages);
if (ret)
goto up_fail;
}
current_thread_info()->sysenter_return =
VDSO32_SYMBOL(addr, SYSENTER_RETURN);
up_fail:
if (ret)
current->mm->context.vdso = NULL;
up_write(&mm->mmap_sem);
return ret;
}
#ifdef CONFIG_X86_64
__initcall(sysenter_setup);
#ifdef CONFIG_SYSCTL
/* Register vsyscall32 into the ABI table */
#include <linux/sysctl.h>
static ctl_table abi_table2[] = {
{
.procname = "vsyscall32",
.data = &sysctl_vsyscall32,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{}
};
static ctl_table abi_root_table2[] = {
{
.ctl_name = CTL_ABI,
.procname = "abi",
.mode = 0555,
.child = abi_table2
},
{}
};
static __init int ia32_binfmt_init(void)
{
register_sysctl_table(abi_root_table2);
return 0;
}
__initcall(ia32_binfmt_init);
#endif
#else /* CONFIG_X86_32 */
const char *arch_vma_name(struct vm_area_struct *vma)
{
if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
return "[vdso]";
return NULL;
}
struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
{
struct mm_struct *mm = tsk->mm;
/* Check to see if this task was created in compat vdso mode */
if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE)
return &gate_vma;
return NULL;
}
int in_gate_area(struct task_struct *task, unsigned long addr)
{
const struct vm_area_struct *vma = get_gate_vma(task);
return vma && addr >= vma->vm_start && addr < vma->vm_end;
}
int in_gate_area_no_task(unsigned long addr)
{
return 0;
}
#endif /* CONFIG_X86_64 */

View File

@@ -0,0 +1,22 @@
#include <linux/init.h>
__INITDATA
.globl vdso32_int80_start, vdso32_int80_end
vdso32_int80_start:
.incbin "arch/x86/vdso/vdso32-int80.so"
vdso32_int80_end:
.globl vdso32_syscall_start, vdso32_syscall_end
vdso32_syscall_start:
#ifdef CONFIG_COMPAT
.incbin "arch/x86/vdso/vdso32-syscall.so"
#endif
vdso32_syscall_end:
.globl vdso32_sysenter_start, vdso32_sysenter_end
vdso32_sysenter_start:
.incbin "arch/x86/vdso/vdso32-sysenter.so"
vdso32_sysenter_end:
__FINIT

View File

@@ -0,0 +1 @@
vdso32.lds

View File

@@ -0,0 +1,56 @@
/*
* Code for the vDSO. This version uses the old int $0x80 method.
*
* First get the common code for the sigreturn entry points.
* This must come first.
*/
#include "sigreturn.S"
.text
.globl __kernel_vsyscall
.type __kernel_vsyscall,@function
ALIGN
__kernel_vsyscall:
.LSTART_vsyscall:
int $0x80
ret
.LEND_vsyscall:
.size __kernel_vsyscall,.-.LSTART_vsyscall
.previous
.section .eh_frame,"a",@progbits
.LSTARTFRAMEDLSI:
.long .LENDCIEDLSI-.LSTARTCIEDLSI
.LSTARTCIEDLSI:
.long 0 /* CIE ID */
.byte 1 /* Version number */
.string "zR" /* NUL-terminated augmentation string */
.uleb128 1 /* Code alignment factor */
.sleb128 -4 /* Data alignment factor */
.byte 8 /* Return address register column */
.uleb128 1 /* Augmentation value length */
.byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
.byte 0x0c /* DW_CFA_def_cfa */
.uleb128 4
.uleb128 4
.byte 0x88 /* DW_CFA_offset, column 0x8 */
.uleb128 1
.align 4
.LENDCIEDLSI:
.long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
.LSTARTFDEDLSI:
.long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
.long .LSTART_vsyscall-. /* PC-relative start address */
.long .LEND_vsyscall-.LSTART_vsyscall
.uleb128 0
.align 4
.LENDFDEDLSI:
.previous
/*
* Pad out the segment to match the size of the sysenter.S version.
*/
VDSO32_vsyscall_eh_frame_size = 0x40
.section .data,"aw",@progbits
.space VDSO32_vsyscall_eh_frame_size-(.LENDFDEDLSI-.LSTARTFRAMEDLSI), 0
.previous

View File

@@ -0,0 +1,44 @@
/*
* This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
* Here we can supply some information useful to userland.
*/
#include <linux/version.h>
#include <linux/elfnote.h>
/* Ideally this would use UTS_NAME, but using a quoted string here
doesn't work. Remember to change this when changing the
kernel's name. */
ELFNOTE_START(Linux, 0, "a")
.long LINUX_VERSION_CODE
ELFNOTE_END
#ifdef CONFIG_XEN
/*
* Add a special note telling glibc's dynamic linker a fake hardware
* flavor that it will use to choose the search path for libraries in the
* same way it uses real hardware capabilities like "mmx".
* We supply "nosegneg" as the fake capability, to indicate that we
* do not like negative offsets in instructions using segment overrides,
* since we implement those inefficiently. This makes it possible to
* install libraries optimized to avoid those access patterns in someplace
* like /lib/i686/tls/nosegneg. Note that an /etc/ld.so.conf.d/file
* corresponding to the bits here is needed to make ldconfig work right.
* It should contain:
* hwcap 1 nosegneg
* to match the mapping of bit to name that we give here.
*
* At runtime, the fake hardware feature will be considered to be present
* if its bit is set in the mask word. So, we start with the mask 0, and
* at boot time we set VDSO_NOTE_NONEGSEG_BIT if running under Xen.
*/
#include "../../xen/vdso.h" /* Defines VDSO_NOTE_NONEGSEG_BIT. */
ELFNOTE_START(GNU, 2, "a")
.long 1 /* ncaps */
VDSO32_NOTE_MASK: /* Symbol used by arch/x86/xen/setup.c */
.long 0 /* mask */
.byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg" /* bit, name */
ELFNOTE_END
#endif

View File

@@ -0,0 +1,144 @@
/*
* Common code for the sigreturn entry points in vDSO images.
* So far this code is the same for both int80 and sysenter versions.
* This file is #include'd by int80.S et al to define them first thing.
* The kernel assumes that the addresses of these routines are constant
* for all vDSO implementations.
*/
#include <linux/linkage.h>
#include <asm/unistd_32.h>
#include <asm/asm-offsets.h>
#ifndef SYSCALL_ENTER_KERNEL
#define SYSCALL_ENTER_KERNEL int $0x80
#endif
.text
.globl __kernel_sigreturn
.type __kernel_sigreturn,@function
ALIGN
__kernel_sigreturn:
.LSTART_sigreturn:
popl %eax /* XXX does this mean it needs unwind info? */
movl $__NR_sigreturn, %eax
SYSCALL_ENTER_KERNEL
.LEND_sigreturn:
nop
.size __kernel_sigreturn,.-.LSTART_sigreturn
.globl __kernel_rt_sigreturn
.type __kernel_rt_sigreturn,@function
ALIGN
__kernel_rt_sigreturn:
.LSTART_rt_sigreturn:
movl $__NR_rt_sigreturn, %eax
SYSCALL_ENTER_KERNEL
.LEND_rt_sigreturn:
nop
.size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
.previous
.section .eh_frame,"a",@progbits
.LSTARTFRAMEDLSI1:
.long .LENDCIEDLSI1-.LSTARTCIEDLSI1
.LSTARTCIEDLSI1:
.long 0 /* CIE ID */
.byte 1 /* Version number */
.string "zRS" /* NUL-terminated augmentation string */
.uleb128 1 /* Code alignment factor */
.sleb128 -4 /* Data alignment factor */
.byte 8 /* Return address register column */
.uleb128 1 /* Augmentation value length */
.byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
.byte 0 /* DW_CFA_nop */
.align 4
.LENDCIEDLSI1:
.long .LENDFDEDLSI1-.LSTARTFDEDLSI1 /* Length FDE */
.LSTARTFDEDLSI1:
.long .LSTARTFDEDLSI1-.LSTARTFRAMEDLSI1 /* CIE pointer */
/* HACK: The dwarf2 unwind routines will subtract 1 from the
return address to get an address in the middle of the
presumed call instruction. Since we didn't get here via
a call, we need to include the nop before the real start
to make up for it. */
.long .LSTART_sigreturn-1-. /* PC-relative start address */
.long .LEND_sigreturn-.LSTART_sigreturn+1
.uleb128 0 /* Augmentation */
/* What follows are the instructions for the table generation.
We record the locations of each register saved. This is
complicated by the fact that the "CFA" is always assumed to
be the value of the stack pointer in the caller. This means
that we must define the CFA of this body of code to be the
saved value of the stack pointer in the sigcontext. Which
also means that there is no fixed relation to the other
saved registers, which means that we must use DW_CFA_expression
to compute their addresses. It also means that when we
adjust the stack with the popl, we have to do it all over again. */
#define do_cfa_expr(offset) \
.byte 0x0f; /* DW_CFA_def_cfa_expression */ \
.uleb128 1f-0f; /* length */ \
0: .byte 0x74; /* DW_OP_breg4 */ \
.sleb128 offset; /* offset */ \
.byte 0x06; /* DW_OP_deref */ \
1:
#define do_expr(regno, offset) \
.byte 0x10; /* DW_CFA_expression */ \
.uleb128 regno; /* regno */ \
.uleb128 1f-0f; /* length */ \
0: .byte 0x74; /* DW_OP_breg4 */ \
.sleb128 offset; /* offset */ \
1:
do_cfa_expr(IA32_SIGCONTEXT_sp+4)
do_expr(0, IA32_SIGCONTEXT_ax+4)
do_expr(1, IA32_SIGCONTEXT_cx+4)
do_expr(2, IA32_SIGCONTEXT_dx+4)
do_expr(3, IA32_SIGCONTEXT_bx+4)
do_expr(5, IA32_SIGCONTEXT_bp+4)
do_expr(6, IA32_SIGCONTEXT_si+4)
do_expr(7, IA32_SIGCONTEXT_di+4)
do_expr(8, IA32_SIGCONTEXT_ip+4)
.byte 0x42 /* DW_CFA_advance_loc 2 -- nop; popl eax. */
do_cfa_expr(IA32_SIGCONTEXT_sp)
do_expr(0, IA32_SIGCONTEXT_ax)
do_expr(1, IA32_SIGCONTEXT_cx)
do_expr(2, IA32_SIGCONTEXT_dx)
do_expr(3, IA32_SIGCONTEXT_bx)
do_expr(5, IA32_SIGCONTEXT_bp)
do_expr(6, IA32_SIGCONTEXT_si)
do_expr(7, IA32_SIGCONTEXT_di)
do_expr(8, IA32_SIGCONTEXT_ip)
.align 4
.LENDFDEDLSI1:
.long .LENDFDEDLSI2-.LSTARTFDEDLSI2 /* Length FDE */
.LSTARTFDEDLSI2:
.long .LSTARTFDEDLSI2-.LSTARTFRAMEDLSI1 /* CIE pointer */
/* HACK: See above wrt unwind library assumptions. */
.long .LSTART_rt_sigreturn-1-. /* PC-relative start address */
.long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1
.uleb128 0 /* Augmentation */
/* What follows are the instructions for the table generation.
We record the locations of each register saved. This is
slightly less complicated than the above, since we don't
modify the stack pointer in the process. */
do_cfa_expr(IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_sp)
do_expr(0, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ax)
do_expr(1, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_cx)
do_expr(2, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_dx)
do_expr(3, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_bx)
do_expr(5, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_bp)
do_expr(6, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_si)
do_expr(7, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_di)
do_expr(8, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ip)
.align 4
.LENDFDEDLSI2:
.previous

View File

@@ -0,0 +1,77 @@
/*
* Code for the vDSO. This version uses the syscall instruction.
*
* First get the common code for the sigreturn entry points.
* This must come first.
*/
#define SYSCALL_ENTER_KERNEL syscall
#include "sigreturn.S"
#include <asm/segment.h>
.text
.globl __kernel_vsyscall
.type __kernel_vsyscall,@function
ALIGN
__kernel_vsyscall:
.LSTART_vsyscall:
push %ebp
.Lpush_ebp:
movl %ecx, %ebp
syscall
movl $__USER32_DS, %ecx
movl %ecx, %ss
movl %ebp, %ecx
popl %ebp
.Lpop_ebp:
ret
.LEND_vsyscall:
.size __kernel_vsyscall,.-.LSTART_vsyscall
.section .eh_frame,"a",@progbits
.LSTARTFRAME:
.long .LENDCIE-.LSTARTCIE
.LSTARTCIE:
.long 0 /* CIE ID */
.byte 1 /* Version number */
.string "zR" /* NUL-terminated augmentation string */
.uleb128 1 /* Code alignment factor */
.sleb128 -4 /* Data alignment factor */
.byte 8 /* Return address register column */
.uleb128 1 /* Augmentation value length */
.byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
.byte 0x0c /* DW_CFA_def_cfa */
.uleb128 4
.uleb128 4
.byte 0x88 /* DW_CFA_offset, column 0x8 */
.uleb128 1
.align 4
.LENDCIE:
.long .LENDFDE1-.LSTARTFDE1 /* Length FDE */
.LSTARTFDE1:
.long .LSTARTFDE1-.LSTARTFRAME /* CIE pointer */
.long .LSTART_vsyscall-. /* PC-relative start address */
.long .LEND_vsyscall-.LSTART_vsyscall
.uleb128 0 /* Augmentation length */
/* What follows are the instructions for the table generation.
We have to record all changes of the stack pointer. */
.byte 0x40 + .Lpush_ebp-.LSTART_vsyscall /* DW_CFA_advance_loc */
.byte 0x0e /* DW_CFA_def_cfa_offset */
.uleb128 8
.byte 0x85, 0x02 /* DW_CFA_offset %ebp -8 */
.byte 0x40 + .Lpop_ebp-.Lpush_ebp /* DW_CFA_advance_loc */
.byte 0xc5 /* DW_CFA_restore %ebp */
.byte 0x0e /* DW_CFA_def_cfa_offset */
.uleb128 4
.align 4
.LENDFDE1:
.previous
/*
* Pad out the segment to match the size of the sysenter.S version.
*/
VDSO32_vsyscall_eh_frame_size = 0x40
.section .data,"aw",@progbits
.space VDSO32_vsyscall_eh_frame_size-(.LENDFDE1-.LSTARTFRAME), 0
.previous

View File

@@ -0,0 +1,116 @@
/*
* Code for the vDSO. This version uses the sysenter instruction.
*
* First get the common code for the sigreturn entry points.
* This must come first.
*/
#include "sigreturn.S"
/*
* The caller puts arg2 in %ecx, which gets pushed. The kernel will use
* %ecx itself for arg2. The pushing is because the sysexit instruction
* (found in entry.S) requires that we clobber %ecx with the desired %esp.
* User code might expect that %ecx is unclobbered though, as it would be
* for returning via the iret instruction, so we must push and pop.
*
* The caller puts arg3 in %edx, which the sysexit instruction requires
* for %eip. Thus, exactly as for arg2, we must push and pop.
*
* Arg6 is different. The caller puts arg6 in %ebp. Since the sysenter
* instruction clobbers %esp, the user's %esp won't even survive entry
* into the kernel. We store %esp in %ebp. Code in entry.S must fetch
* arg6 from the stack.
*
* You can not use this vsyscall for the clone() syscall because the
* three words on the parent stack do not get copied to the child.
*/
.text
.globl __kernel_vsyscall
.type __kernel_vsyscall,@function
ALIGN
__kernel_vsyscall:
.LSTART_vsyscall:
push %ecx
.Lpush_ecx:
push %edx
.Lpush_edx:
push %ebp
.Lenter_kernel:
movl %esp,%ebp
sysenter
/* 7: align return point with nop's to make disassembly easier */
.space 7,0x90
/* 14: System call restart point is here! (SYSENTER_RETURN-2) */
jmp .Lenter_kernel
/* 16: System call normal return point is here! */
VDSO32_SYSENTER_RETURN: /* Symbol used by sysenter.c via vdso32-syms.h */
pop %ebp
.Lpop_ebp:
pop %edx
.Lpop_edx:
pop %ecx
.Lpop_ecx:
ret
.LEND_vsyscall:
.size __kernel_vsyscall,.-.LSTART_vsyscall
.previous
.section .eh_frame,"a",@progbits
.LSTARTFRAMEDLSI:
.long .LENDCIEDLSI-.LSTARTCIEDLSI
.LSTARTCIEDLSI:
.long 0 /* CIE ID */
.byte 1 /* Version number */
.string "zR" /* NUL-terminated augmentation string */
.uleb128 1 /* Code alignment factor */
.sleb128 -4 /* Data alignment factor */
.byte 8 /* Return address register column */
.uleb128 1 /* Augmentation value length */
.byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
.byte 0x0c /* DW_CFA_def_cfa */
.uleb128 4
.uleb128 4
.byte 0x88 /* DW_CFA_offset, column 0x8 */
.uleb128 1
.align 4
.LENDCIEDLSI:
.long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
.LSTARTFDEDLSI:
.long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
.long .LSTART_vsyscall-. /* PC-relative start address */
.long .LEND_vsyscall-.LSTART_vsyscall
.uleb128 0
/* What follows are the instructions for the table generation.
We have to record all changes of the stack pointer. */
.byte 0x40 + (.Lpush_ecx-.LSTART_vsyscall) /* DW_CFA_advance_loc */
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x08 /* RA at offset 8 now */
.byte 0x40 + (.Lpush_edx-.Lpush_ecx) /* DW_CFA_advance_loc */
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x0c /* RA at offset 12 now */
.byte 0x40 + (.Lenter_kernel-.Lpush_edx) /* DW_CFA_advance_loc */
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x10 /* RA at offset 16 now */
.byte 0x85, 0x04 /* DW_CFA_offset %ebp -16 */
/* Finally the epilogue. */
.byte 0x40 + (.Lpop_ebp-.Lenter_kernel) /* DW_CFA_advance_loc */
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x0c /* RA at offset 12 now */
.byte 0xc5 /* DW_CFA_restore %ebp */
.byte 0x40 + (.Lpop_edx-.Lpop_ebp) /* DW_CFA_advance_loc */
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x08 /* RA at offset 8 now */
.byte 0x40 + (.Lpop_ecx-.Lpop_edx) /* DW_CFA_advance_loc */
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x04 /* RA at offset 4 now */
.align 4
.LENDFDEDLSI:
.previous
/*
* Emit a symbol with the size of this .eh_frame data,
* to verify it matches the other versions.
*/
VDSO32_vsyscall_eh_frame_size = (.LENDFDEDLSI-.LSTARTFRAMEDLSI)

View File

@@ -0,0 +1,37 @@
/*
* Linker script for 32-bit vDSO.
* We #include the file to define the layout details.
* Here we only choose the prelinked virtual address.
*
* This file defines the version script giving the user-exported symbols in
* the DSO. We can define local symbols here called VDSO* to make their
* values visible using the asm-x86/vdso.h macros from the kernel proper.
*/
#define VDSO_PRELINK 0
#include "../vdso-layout.lds.S"
/* The ELF entry point can be used to set the AT_SYSINFO value. */
ENTRY(__kernel_vsyscall);
/*
* This controls what userland symbols we export from the vDSO.
*/
VERSION
{
LINUX_2.5 {
global:
__kernel_vsyscall;
__kernel_sigreturn;
__kernel_rt_sigreturn;
local: *;
};
}
/*
* Symbols we define here called VDSO* get their values into vdso32-syms.h.
*/
VDSO32_PRELINK = VDSO_PRELINK;
VDSO32_vsyscall = __kernel_vsyscall;
VDSO32_sigreturn = __kernel_sigreturn;
VDSO32_rt_sigreturn = __kernel_rt_sigreturn;

View File

@@ -0,0 +1,16 @@
#ifndef VEXTERN
#include <asm/vsyscall.h>
#define VEXTERN(x) \
extern typeof(x) *vdso_ ## x __attribute__((visibility("hidden")));
#endif
#define VMAGIC 0xfeedbabeabcdefabUL
/* Any kernel variables used in the vDSO must be exported in the main
kernel's vmlinux.lds.S/vsyscall.h/proper __section and
put into vextern.h and be referenced as a pointer with vdso prefix.
The main kernel later fills in the values. */
VEXTERN(jiffies)
VEXTERN(vgetcpu_mode)
VEXTERN(vsyscall_gtod_data)

View File

@@ -0,0 +1,36 @@
/*
* Copyright 2006 Andi Kleen, SUSE Labs.
* Subject to the GNU Public License, v.2
*
* Fast user context implementation of getcpu()
*/
#include <linux/kernel.h>
#include <linux/getcpu.h>
#include <linux/jiffies.h>
#include <linux/time.h>
#include <asm/vsyscall.h>
#include <asm/vgtod.h>
#include "vextern.h"
notrace long
__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
{
unsigned int p;
if (*vdso_vgetcpu_mode == VGETCPU_RDTSCP) {
/* Load per CPU data from RDTSCP */
native_read_tscp(&p);
} else {
/* Load per CPU data from GDT */
asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
}
if (cpu)
*cpu = p & 0xfff;
if (node)
*node = p >> 12;
return 0;
}
long getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
__attribute__((weak, alias("__vdso_getcpu")));

141
kernel/arch/x86/vdso/vma.c Normal file
View File

@@ -0,0 +1,141 @@
/*
* Set up the VMAs to tell the VM about the vDSO.
* Copyright 2007 Andi Kleen, SUSE Labs.
* Subject to the GPL, v.2
*/
#include <linux/mm.h>
#include <linux/err.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/random.h>
#include <linux/elf.h>
#include <asm/vsyscall.h>
#include <asm/vgtod.h>
#include <asm/proto.h>
#include <asm/vdso.h>
#include "vextern.h" /* Just for VMAGIC. */
#undef VEXTERN
unsigned int __read_mostly vdso_enabled = 1;
extern char vdso_start[], vdso_end[];
extern unsigned short vdso_sync_cpuid;
static struct page **vdso_pages;
static unsigned vdso_size;
static inline void *var_ref(void *p, char *name)
{
if (*(void **)p != (void *)VMAGIC) {
printk("VDSO: variable %s broken\n", name);
vdso_enabled = 0;
}
return p;
}
static int __init init_vdso_vars(void)
{
int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE;
int i;
char *vbase;
vdso_size = npages << PAGE_SHIFT;
vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL);
if (!vdso_pages)
goto oom;
for (i = 0; i < npages; i++) {
struct page *p;
p = alloc_page(GFP_KERNEL);
if (!p)
goto oom;
vdso_pages[i] = p;
copy_page(page_address(p), vdso_start + i*PAGE_SIZE);
}
vbase = vmap(vdso_pages, npages, 0, PAGE_KERNEL);
if (!vbase)
goto oom;
if (memcmp(vbase, "\177ELF", 4)) {
printk("VDSO: I'm broken; not ELF\n");
vdso_enabled = 0;
}
#define VEXTERN(x) \
*(typeof(__ ## x) **) var_ref(VDSO64_SYMBOL(vbase, x), #x) = &__ ## x;
#include "vextern.h"
#undef VEXTERN
return 0;
oom:
printk("Cannot allocate vdso\n");
vdso_enabled = 0;
return -ENOMEM;
}
__initcall(init_vdso_vars);
struct linux_binprm;
/* Put the vdso above the (randomized) stack with another randomized offset.
This way there is no hole in the middle of address space.
To save memory make sure it is still in the same PTE as the stack top.
This doesn't give that many random bits */
static unsigned long vdso_addr(unsigned long start, unsigned len)
{
unsigned long addr, end;
unsigned offset;
end = (start + PMD_SIZE - 1) & PMD_MASK;
if (end >= TASK_SIZE_MAX)
end = TASK_SIZE_MAX;
end -= len;
/* This loses some more bits than a modulo, but is cheaper */
offset = get_random_int() & (PTRS_PER_PTE - 1);
addr = start + (offset << PAGE_SHIFT);
if (addr >= end)
addr = end;
return addr;
}
/* Setup a VMA at program startup for the vsyscall page.
Not called for compat tasks */
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
struct mm_struct *mm = current->mm;
unsigned long addr;
int ret;
if (!vdso_enabled)
return 0;
down_write(&mm->mmap_sem);
addr = vdso_addr(mm->start_stack, vdso_size);
addr = get_unmapped_area(NULL, addr, vdso_size, 0, 0);
if (IS_ERR_VALUE(addr)) {
ret = addr;
goto up_fail;
}
current->mm->context.vdso = (void *)addr;
ret = install_special_mapping(mm, addr, vdso_size,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
VM_ALWAYSDUMP,
vdso_pages);
if (ret) {
current->mm->context.vdso = NULL;
goto up_fail;
}
up_fail:
up_write(&mm->mmap_sem);
return ret;
}
static __init int vdso_setup(char *s)
{
vdso_enabled = simple_strtoul(s, NULL, 0);
return 0;
}
__setup("vdso=", vdso_setup);

View File

@@ -0,0 +1,12 @@
/* Define pointer to external vDSO variables.
These are part of the vDSO. The kernel fills in the real addresses
at boot time. This is done because when the vdso is linked the
kernel isn't yet and we don't know the final addresses. */
#include <linux/kernel.h>
#include <linux/time.h>
#include <asm/vsyscall.h>
#include <asm/timex.h>
#include <asm/vgtod.h>
#define VEXTERN(x) typeof (__ ## x) *const vdso_ ## x = (void *)VMAGIC;
#include "vextern.h"