/*
  seccom-loader: secure computing loader for linux
  Copyright (C) 2004-2006  Andrea Arcangeli <andrea@cpushare.com>
  
  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation;
  only version 2.1 of the License.

  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

/*
 * NOTE: please don't change the exit codes (the parameters to the
 * exit() calls), they're part of the client API and the server knows their
 * semantics.
 */

#include <sys/resource.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <sys/prctl.h>
#include <signal.h>
#include <limits.h>

#include <seccomp-loader.h>

//#define DEBUG

#ifdef DEBUG
#include <stdio.h>
#endif

#if !defined(__i386__) && (!defined(__powerpc__) || defined(__powerpc64__))
#error "only i686 and ppc are supported, not enough resources to port to other archs yet"
#endif

/*
 * we can trust the header because we trust the caller.
 * the only thing we cannot trust is the bytecode
 * that we're about to run. Everything we do before
 * turning on the "secure computing for linux" kernel feature
 * (aka chroot on steroids ;), is trusted.
 */
int main(int argc, char ** argv)
{
	struct obj_header obj_header;
	int nr, tot, i;
	char * text_addr, * data_bss_addr, * heap_addr;
	struct rlimit rlimit;
	void (*call_exec)(char * heap_addr, int, int);
	void (*signal_addr)(int);
	unsigned char magic;

	if (sizeof(long) != sizeof(int)) {
		extern void fail_to_link(void);
		fail_to_link();
	}

#if 0 /* let's to do this in the parent */
	if (nice(INT_MAX) < 0)
		exit(1);
#endif
#ifndef DEBUG
	if (close(2) != 0)
		exit(1);
#endif
	for (i = 3; i < 10; i++)
		if (close(i) == 0)
			exit(1);

	tot = 0;
	for (;;) {
		if (tot == sizeof(struct obj_header))
			break;
		nr = read(0, ((char *) &obj_header) + tot, sizeof(struct obj_header) - tot);
		if (nr <= 0)
			exit(2);
		tot += nr;
		if (tot > sizeof(struct obj_header))
			exit(2);
	}

	/* convert to host format */
	obj_header.text_size = ntohl(obj_header.text_size);
	obj_header.data_size = ntohl(obj_header.data_size);
	obj_header.bss_size = ntohl(obj_header.bss_size);
	obj_header.heap_size = ntohl(obj_header.heap_size);
	obj_header.stack_size = ntohl(obj_header.stack_size);
	obj_header.call_address = ntohl(obj_header.call_address);
	obj_header.signal_address = ntohl(obj_header.signal_address);
	obj_header.cksum = ntohl(obj_header.cksum);

	/* just a quick robustness check, it must never happen */
	if (obj_header.text_size <= 0 || obj_header.text_size > MAX_TEXT_SIZE ||
	    obj_header.data_size < 0 || obj_header.bss_size < 0 ||
	    obj_header.data_size + obj_header.bss_size > MAX_DATA_BSS_SIZE ||
	    obj_header.heap_size < 0 || obj_header.heap_size > MAX_HEAP_SIZE ||
	    obj_header.stack_size < 0 || obj_header.stack_size > MAX_STACK_SIZE)
		exit(3);
	if (obj_header.call_address < TEXT_ADDRESS ||
	    obj_header.call_address >= TEXT_ADDRESS + obj_header.text_size)
		exit(3);
	if (obj_header.signal_address < TEXT_ADDRESS ||
	    obj_header.signal_address >= TEXT_ADDRESS + obj_header.text_size)
		exit(3);

	/* set the stack */
	rlimit.rlim_max = rlimit.rlim_cur = obj_header.stack_size;
	if (setrlimit(RLIMIT_STACK, &rlimit) < 0)
		exit(4); /* caller knows this number */

	/* disable core dumping */
	rlimit.rlim_max = rlimit.rlim_cur = 0;
	if (setrlimit(RLIMIT_CORE, &rlimit) < 0)
		exit(17); /* failed to disable core dumping */

	/* load the bytecode */
	text_addr = mmap((void *) TEXT_ADDRESS, obj_header.text_size,
			 PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0);
	if (text_addr == MAP_FAILED)
		exit(5);
	
	tot = 0;
	for (;;) {
		if (tot == obj_header.text_size)
			break;
		nr = read(0, text_addr + tot, obj_header.text_size - tot);
		if (nr <= 0)
			exit(6);
		tot += nr;
		if (tot > obj_header.text_size)
			exit(6);
	}

	/* load data and bss */
	data_bss_addr = mmap((void *) DATA_BSS_ADDRESS, obj_header.data_size + obj_header.bss_size,
			     PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0);
	if (data_bss_addr == MAP_FAILED)
		exit(7);
	
	tot = 0;
	for (;;) {
		if (tot == obj_header.data_size)
			break;
		nr = read(0, data_bss_addr + tot, obj_header.data_size - tot);
		if (nr <= 0)
			exit(8);
		tot += nr;
		if (tot > obj_header.data_size)
			exit(9);
	}

	/* verify checksum */
	{
		u_int8_t * p, * x;
		u_int32_t __cksum, cksum;

		cksum = obj_header.cksum;
		obj_header.cksum = 0;

		__cksum = 0;

		x = (u_int8_t *) &obj_header;
		for (p = x; p < x + sizeof(struct obj_header); p++)
			__cksum += *p;

		x = (u_int8_t *) text_addr;
		for (p = x; p < x + obj_header.text_size; p++)
			__cksum += *p;

		x = (u_int8_t *) data_bss_addr;
		/* this will check that bss is zero too */
		for (p = x; p < x + obj_header.data_size + obj_header.bss_size; p++)
			__cksum += *p;

		if (__cksum != cksum)
			exit(10);
	}

	/* PROT_READ for rodata */
	if (mprotect((void *) TEXT_ADDRESS, obj_header.text_size, PROT_READ|PROT_EXEC) < 0)
		exit(11);

	/* load the heap */
	heap_addr = mmap(NULL, obj_header.heap_size, PROT_READ|PROT_WRITE,
			 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
	if (heap_addr == MAP_FAILED)
		exit(12);

	/* set the signal */
	signal_addr = (void (*)(int)) ((unsigned long) obj_header.signal_address);
	signal(SIGQUIT, signal_addr);

	/* prepare the call */
	call_exec = (void (*)(char *, int, int)) ((unsigned long)
						  obj_header.call_address);

#ifdef DEBUG
	sleep(20);
#endif

	/* handle old and new seccomp APIs */
	if (read(0, &magic, 1) != 1)
		exit(14);
	if (magic == MAGIC_SECCOMP_PRCTL) {
		/* prctl */
		if (prctl(PR_GET_SECCOMP, 0, 0, 0, 0) != 0)
			exit(15);
		if (prctl(PR_SET_SECCOMP, 1, 0, 0, 0) != 0)
			exit(16);
	} else if (magic != MAGIC_SECCOMP_PROC)
		exit(17);

	/* notify the daemon and require seccomp enabling */
	magic = MAGIC_ASK_SECCOMP;
	if (write(1, &magic, 1) != 1)
		exit(18);
	if (read(0, &magic, 1) != 1)
		exit(19);
	if (magic != MAGIC_GOT_SECCOMP)
		exit(20);

#ifdef DEBUG
	fprintf(stderr, "%p %p\n", *call_exec, *signal_addr);
#endif

	/*
	 * cool, seccomp is enabled now, fire the engines.
	 * glibc is forbidden from now on, only pure C code
	 * is allowed and read/write/exit/sigreturn syscalls,
	 * so call exit() with syscalls by hand.
	 */
	call_exec(heap_addr, obj_header.heap_size, obj_header.stack_size);

	/*
	 * The retval 0 is very important, only the succesful completion
	 * is allowed to use a 0 exit_code.
	 */
	sys_exit(0);

	return 0;
}
