added commands as discussed with Uriel yesterday

2025-09-02 21:33:48 -07:00 · 2010-05-28 11:30:17 +01:00
parent 85bacddf77
commit fa62640154
41 changed files with 4238 additions and 16 deletions
--- a/53
+++ b/53
@@ -2,9 +2,56 @@

 include config.mk

-SUBDIRS  = lib9 yacc awk basename bc cal cat cleanname date dc du dd echo ed \
-           factor fortune fmt freq getflags grep hoc ls mk mkdir mtime primes \
-           rc read sha1sum sed seq sleep sort tail tee test touch tr troff uniq
+SUBDIRS  = lib9\
+	yacc\
+	ascii\
+	awk\
+	basename\
+	bc\
+	cal\
+	cat\
+	cleanname\
+	cmp\
+	date\
+	dc\
+	du\
+	dd\
+	diff\
+	echo\
+	ed\
+	factor\
+	fortune\
+	fmt\
+	freq\
+	getflags\
+	grep\
+	hoc\
+	join\
+	look\
+	ls\
+	mk\
+	mkdir\
+	mtime\
+	pbd\
+	primes\
+	rc\
+	read\
+	sha1sum\
+	sed\
+	seq\
+	sleep\
+	sort\
+	split\
+	strings\
+	tail\
+	tee\
+	test\
+	touch\
+	tr\
+	troff\
+	unicode\
+	uniq\
+	unutf\

 all:
 	@echo 9base build options:
--- a/11
+++ b/11
@@ -1,11 +0,0 @@
-12:13 < uriel> garbeam: add dd and diff too
-12:13 < uriel> and split
-12:14 < uriel> (and join)
-12:15 < uriel> and unutf (which I just noticed, seems to be undocumented, but seems quite useful too)
-12:15 < uriel> and tcs
-12:16 < uriel> and strings
-12:18 < uriel> oh, oh, I'm finding some great bits:
-12:18 < uriel> look(1), ascii(1) and unicode(1)
-12:19 < uriel> ok, and cmp(1) is missing too
-12:23 < uriel> hah! plan9/src/cmd/index/ is really interesting (but not worth including)
-12:26 < uriel> oh! pbd! what a wonderful discovery, we certainly should add it too
--- a/ascii/Makefile
+++ b/ascii/Makefile
@@ -0,0 +1,10 @@
+# ascii - ascii unix port from plan9
+# Depends on ../lib9
+
+TARG      = ascii
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
--- a/ascii/ascii.1
+++ b/ascii/ascii.1
@@ -0,0 +1,160 @@
+.TH ASCII 1 
+.SH NAME
+ascii, unicode \- interpret ASCII, Unicode characters
+.SH SYNOPSIS
+.B ascii
+[
+.B -8
+]
+[
+.BI -oxdb n
+]
+[
+.B -nct
+]
+[
+.I text
+]
+.PP
+.B unicode
+[
+.B -nt
+]
+.IB hexmin - hexmax
+.PP
+.B unicode
+[
+.B -t
+]
+.I hex
+[
+\&...
+]
+.PP
+.B unicode
+[
+.B -n
+]
+.I characters
+.PP
+.B look
+.I hex
+.B \*9/lib/unicode
+.SH DESCRIPTION
+.I Ascii
+prints the
+.SM ASCII 
+values corresponding to characters and
+.I vice
+.IR versa ;
+under the
+.B -8
+option, the
+.SM ISO
+Latin-1 extensions (codes 0200-0377) are included.
+The values are interpreted in a settable numeric base;
+.B -o
+specifies octal,
+.B -d
+decimal,
+.B -x
+hexadecimal (the default), and
+.BI -b n
+base
+.IR n .
+.PP
+With no arguments,
+.I ascii
+prints a table of the character set in the specified base.
+Characters of
+.I text
+are converted to their
+.SM ASCII 
+values, one per line. If, however, the first
+.I text
+argument is a valid number in the specified base, conversion
+goes the opposite way.
+Control characters are printed as two- or three-character mnemonics.
+Other options are:
+.TP
+.B -n
+Force numeric output.
+.TP
+.B -c
+Force character output.
+.TP
+.B -t
+Convert from numbers to running text; do not interpret
+control characters or insert newlines.
+.PP
+.I Unicode
+is similar; it converts between
+.SM UTF
+and character values from the Unicode Standard (see
+.IR utf (7)).
+If given a range of hexadecimal numbers,
+.I unicode
+prints a table of the specified Unicode characters \(em their values and
+.SM UTF
+representations.
+Otherwise it translates from
+.SM UTF
+to numeric value or vice versa,
+depending on the appearance of the supplied text;
+the
+.B -n
+option forces numeric output to avoid ambiguity with numeric characters.
+If converting to
+.SM UTF ,
+the characters are printed one per line unless the
+.B -t
+flag is set, in which case the output is a single string
+containing only the specified characters.
+Unlike
+.IR ascii ,
+.I unicode
+treats no characters specially.
+.PP
+The output of
+.I ascii
+and
+.I unicode
+may be unhelpful if the characters printed are not available in the current font.
+.PP
+The file
+.B \*9/lib/unicode
+contains a
+table of characters and descriptions, sorted in hexadecimal order,
+suitable for
+.IR look (1)
+on the lower case
+.I hex
+values of characters.
+.SH EXAMPLES
+.TP
+.B "ascii -d"
+Print the
+.SM ASCII 
+table base 10.
+.TP
+.B "unicode p"
+Print the hex value of `p'.
+.TP
+.B "unicode 2200-22f1"
+Print a table of miscellaneous mathematical symbols.
+.TP
+.B "look 039 \*9/lib/unicode"
+See the start of the Greek alphabet's encoding in the Unicode Standard.
+.SH FILES
+.TP
+.B \*9/lib/unicode
+table of characters and descriptions.
+.SH SOURCE
+.B \*9/src/cmd/ascii.c
+.br
+.B \*9/src/cmd/unicode.c
+.SH "SEE ALSO"
+.IR look (1),
+.IR tcs (1),
+.IR utf (7),
+.IR font (7)
--- a/ascii/ascii.c
+++ b/ascii/ascii.c
@@ -0,0 +1,181 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+
+#define	MAXBASE	36
+
+void	usage(void);
+void	put(int);
+void	putn(int, int);
+void	puttext(char *);
+void	putnum(char *);
+int	btoi(char *);
+int	value(int, int);
+int	isnum(char *);
+
+char *str[256]={
+	"nul",	"soh",	"stx",	"etx",	"eot",	"enq",	"ack",	"bel",
+	"bs ",	"ht ",	"nl ",	"vt ",	"np ",	"cr ",	"so ",	"si ",
+	"dle",	"dc1",	"dc2",	"dc3",	"dc4",	"nak",	"syn",	"etb",
+	"can",	"em ",	"sub",	"esc",	"fs ",	"gs ",	"rs ",	"us ",
+	"sp ",	" ! ",	" \" ",	" # ",	" $ ",	" % ",	" & ",	" ' ",
+	" ( ",	" ) ",	" * ",	" + ",	" , ",	" - ",	" . ",	" / ",
+	" 0 ",	" 1 ",	" 2 ",	" 3 ",	" 4 ",	" 5 ",	" 6 ",	" 7 ",
+	" 8 ",	" 9 ",	" : ",	" ; ",	" < ",	" = ",	" > ",	" ? ",
+	" @ ",	" A ",	" B ",	" C ",	" D ",	" E ",	" F ",	" G ",
+	" H ",	" I ",	" J ",	" K ",	" L ",	" M ",	" N ",	" O ",
+	" P ",	" Q ",	" R ",	" S ",	" T ",	" U ",	" V ",	" W ",
+	" X ",	" Y ",	" Z ",	" [ ",	" \\ ",	" ] ",	" ^ ",	" _ ",
+	" ` ",	" a ",	" b ",	" c ",	" d ",	" e ",	" f ",	" g ",
+	" h ",	" i ",	" j ",	" k ",	" l ",	" m ",	" n ",	" o ",
+	" p ",	" q ",	" r ",	" s ",	" t ",	" u ",	" v ",	" w ",
+	" x ",	" y ",	" z ",	" { ",	" | ",	" } ",	" ~ ",	"del",
+	"x80",	"x81",	"x82",	"x83",	"x84",	"x85",	"x86",	"x87",
+	"x88",	"x89",	"x8a",	"x8b",	"x8c",	"x8d",	"x8e",	"x8f",
+	"x90",	"x91",	"x92",	"x93",	"x94",	"x95",	"x96",	"x97",
+	"x98",	"x99",	"x9a",	"x9b",	"x9c",	"x9d",	"x9e",	"x9f",
+	"xa0",	" ¡ ",	" ¢ ",	" £ ",	" ¤ ",	" ¥ ",	" ¦ ",	" § ",
+	" ¨ ",	" © ",	" ª ",	" « ",	" ¬ ",	"  ",	" ® ",	" ¯ ",
+	" ° ",	" ± ",	" ² ",	" ³ ",	" ´ ",	" µ ",	" ¶ ",	" · ",
+	" ¸ ",	" ¹ ",	" º ",	" » ",	" ¼ ",	" ½ ",	" ¾ ",	" ¿ ",
+	" À ",	" Á ",	" Â ",	" Ã ",	" Ä ",	" Å ",	" Æ ",	" Ç ",
+	" È ",	" É ",	" Ê ",	" Ë ",	" Ì ",	" Í ",	" Î ",	" Ï ",
+	" Ð ",	" Ñ ",	" Ò ",	" Ó ",	" Ô ",	" Õ ",	" Ö ",	" × ",
+	" Ø ",	" Ù ",	" Ú ",	" Û ",	" Ü ",	" Ý ",	" Þ ",	" ß ",
+	" à ",	" á ",	" â ",	" ã ",	" ä ",	" å ",	" æ ",	" ç ",
+	" è ",	" é ",	" ê ",	" ë ",	" ì ",	" í ",	" î ",	" ï ",
+	" ð ",	" ñ ",	" ò ",	" ó ",	" ô ",	" õ ",	" ö ",	" ÷ ",
+	" ø ",	" ù ",	" ú ",	" û ",	" ü ",	" ý ",	" þ ",	" ÿ "
+};
+
+char Ncol[]={
+    0,0,7,5,4,4,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+};
+
+int 	nchars=128;
+int 	base=16;
+int 	ncol;
+int 	text=1;
+int	strip=0;
+Biobuf	bin;
+
+void
+main(int argc, char **argv)
+{
+	int i;
+
+	Binit(&bin, 1, OWRITE);
+	ARGBEGIN{
+	case '8':
+		nchars=256; break;
+	case 'x':
+		base=16; break;
+	case 'o':
+		base=8; break;
+	case 'd':
+		base=10; break;
+	case 'b':
+		base=strtoul(EARGF(usage()), 0, 0);
+		if(base<2||base>MAXBASE)
+			usage();
+		break;
+	case 'n':
+		text=0; break;
+	case 't':
+		strip=1;
+		/* fall through */
+	case 'c':
+		text=2; break;
+	default:
+		usage();
+	}ARGEND
+
+	ncol=Ncol[base];
+	if(argc==0){
+		for(i=0;i<nchars;i++){
+			put(i);
+			if((i&7)==7)
+				Bprint(&bin, "|\n");
+		}
+	}else{
+		if(text==1)
+			text=isnum(argv[0]);
+		while(argc--)
+			if(text)
+				puttext(*argv++);
+			else
+				putnum(*argv++);
+	}
+	Bputc(&bin, '\n');
+	exits(0);
+}
+void
+usage(void)
+{
+	fprint(2, "usage: %s [-8] [-xod | -b8] [-ncst] [--] [text]\n", argv0);
+	exits("usage");
+}
+void
+put(int i)
+{
+	Bputc(&bin, '|');
+	putn(i, ncol);
+	Bprint(&bin, " %s", str[i]);
+}
+char dig[]="0123456789abcdefghijklmnopqrstuvwxyz";
+void
+putn(int n, int ndig)
+{
+	if(ndig==0)
+		return;
+	putn(n/base, ndig-1);
+	Bputc(&bin, dig[n%base]);
+}
+void
+puttext(char *s)
+{
+	int n;
+	n=btoi(s)&0377;
+	if(strip)
+		Bputc(&bin, n);
+	else
+		Bprint(&bin, "%s\n", str[n]);
+}
+void
+putnum(char *s)
+{
+	while(*s){
+		putn(*s++&0377, ncol);
+		Bputc(&bin, '\n');
+	}
+}
+int
+btoi(char *s)
+{
+	int n;
+	n=0;
+	while(*s)
+		n=n*base+value(*s++, 0);
+	return(n);
+}
+int
+value(int c, int f)
+{
+	char *s;
+	for(s=dig; s<dig+base; s++)
+		if(*s==c)
+			return(s-dig);
+	if(f)
+		return(-1);
+	fprint(2, "%s: bad input char %c\n", argv0, c);
+	exits("bad");
+	return 0;	/* to keep ken happy */
+}
+int
+isnum(char *s)
+{
+	while(*s)
+		if(value(*s++, 1)==-1)
+			return(0);
+	return(1);
+}
--- a/cmp/Makefile
+++ b/cmp/Makefile
@@ -0,0 +1,10 @@
+# cmp - cmp unix port from plan9
+# Depends on ../lib9
+
+TARG      = cmp
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
--- a/cmp/cmp.1
+++ b/cmp/cmp.1
@@ -0,0 +1,57 @@
+.TH CMP 1 
+.SH NAME
+cmp \- compare two files
+.SH SYNOPSIS
+.B cmp
+[
+.B -lsL
+]
+.I file1 file2
+[
+.I offset1
+[
+.I offset2
+]
+]
+.SH DESCRIPTION
+The two files are
+compared.
+A diagnostic results if the contents differ, otherwise
+there is no output.
+.PP
+The options are:
+.TP
+.B l
+Print the byte number (decimal) and the
+differing bytes (hexadecimal) for each difference.
+.TP
+.B s
+Print nothing for differing files,
+but set the exit status.
+.TP
+.B L
+Print the line number of the first differing byte.
+.PP
+If offsets are given,
+comparison starts at the designated byte position
+of the corresponding file.
+Offsets that begin with
+.B 0x
+are hexadecimal;
+with
+.BR 0 ,
+octal; with anything else, decimal.
+.SH SOURCE
+.B \*9/src/cmd/cmp.c
+.SH "SEE ALSO"
+.IR diff (1) 
+.SH DIAGNOSTICS
+If a file is inaccessible or missing, the exit status is
+.LR open .
+If the files are the same, the exit status is empty (true).
+If they are the same except that one is longer than the other, the exit status is
+.LR EOF .
+Otherwise
+.I cmp
+reports the position of the first disagreeing byte and the exit status is
+.LR differ .
--- a/cmp/cmp.c
+++ b/cmp/cmp.c
@@ -0,0 +1,112 @@
+#include <u.h>
+#include <libc.h>
+
+#define		BUF		65536
+
+int sflag = 0;
+int lflag = 0;
+int Lflag = 0;
+
+static void usage(void);
+
+void
+main(int argc, char *argv[])
+{
+	int n, i;
+	uchar *p, *q;
+	uchar buf1[BUF], buf2[BUF];
+	int f1, f2;
+	vlong nc = 1, o, l = 1;
+	char *name1, *name2;
+	uchar *b1s, *b1e, *b2s, *b2e;
+
+	ARGBEGIN{
+	case 's':	sflag = 1; break;
+	case 'l':	lflag = 1; break;
+	case 'L':	Lflag = 1; break;
+	default:	usage();
+	}ARGEND
+	if(argc < 2)
+		usage();
+	if((f1 = open(name1 = *argv++, OREAD)) == -1){
+		if(!sflag) perror(name1);
+		exits("open");
+	}
+	if((f2 = open(name2 = *argv++, OREAD)) == -1){
+		if(!sflag) perror(name2);
+		exits("open");
+	}
+	if(*argv){
+		o = strtoll(*argv++, 0, 0);
+		if(seek(f1, o, 0) < 0){
+			if(!sflag) perror("cmp: seek by offset1");
+			exits("seek 1");
+		}
+	}
+	if(*argv){
+		o = strtoll(*argv++, 0, 0);
+		if(seek(f2, o, 0) < 0){
+			if(!sflag) perror("cmp: seek by offset2");
+			exits("seek 2");
+		}
+	}
+	if(*argv)
+		usage();
+	b1s = b1e = buf1;
+	b2s = b2e = buf2;
+	for(;;){
+		if(b1s >= b1e){
+			if(b1s >= &buf1[BUF])
+				b1s = buf1;
+			n = read(f1, b1s,  &buf1[BUF] - b1s);
+			b1e = b1s + n;
+		}
+		if(b2s >= b2e){
+			if(b2s >= &buf2[BUF])
+				b2s = buf2;
+			n = read(f2, b2s,  &buf2[BUF] - b2s);
+			b2e = b2s + n;
+		}
+		n = b2e - b2s;
+		if(n > b1e - b1s)
+			n = b1e - b1s;
+		if(n <= 0)
+			break;
+		if(memcmp((void *)b1s, (void *)b2s, n) != 0){
+			if(sflag)
+				exits("differ");
+			for(p = b1s, q = b2s, i = 0; i < n; p++, q++, i++) {
+				if(*p == '\n')
+					l++;
+				if(*p != *q){
+					if(!lflag){
+						print("%s %s differ: char %lld",
+						    name1, name2, nc+i);
+						print(Lflag?" line %lld\n":"\n", l);
+						exits("differ");
+					}
+					print("%6lld 0x%.2x 0x%.2x\n", nc+i, *p, *q);
+				}
+			}
+		}		
+		if(Lflag)
+			for(p = b1s; p < b1e;)
+				if(*p++ == '\n')
+					l++;
+		nc += n;
+		b1s += n;
+		b2s += n;
+	}
+	if(b1e - b1s == b2e - b2s)
+		exits((char *)0);
+	if(!sflag)
+		print("EOF on %s\n", (b1e - b1s > b2e - b2s)? name2 : name1);
+	exits("EOF");
+}
+
+static void
+usage(void)
+{
+	print("Usage: cmp [-lsL] file1 file2 [offset1 [offset2] ]\n");
+	exits("usage");
+}
--- a/dd/Makefile
+++ b/dd/Makefile
@@ -0,0 +1,10 @@
+# dd - dd unix port from plan9
+# Depends on ../lib9
+
+TARG      = dd
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
--- a/dd/dd.1
+++ b/dd/dd.1
--- a/dd/dd.c
+++ b/dd/dd.c
@@ -0,0 +1,660 @@
+#include <u.h>
+#include <libc.h>
+
+#define	BIG	2147483647
+#define	LCASE	(1<<0)
+#define	UCASE	(1<<1)
+#define	SWAB	(1<<2)
+#define NERR	(1<<3)
+#define SYNC	(1<<4)
+int	cflag;
+int	fflag;
+char	*string;
+char	*ifile;
+char	*ofile;
+char	*ibuf;
+char	*obuf;
+vlong	skip;
+vlong	oseekn;
+vlong	iseekn;
+vlong	count;
+long	files	= 1;
+long	ibs	= 512;
+long	obs	= 512;
+long	bs;
+long	cbs;
+long	ibc;
+long	obc;
+long	cbc;
+long	nifr;
+long	nipr;
+long	nofr;
+long	nopr;
+long	ntrunc;
+int dotrunc = 1;
+int	ibf;
+int	obf;
+char	*op;
+int	nspace;
+uchar	etoa[256];
+uchar	atoe[256];
+uchar	atoibm[256];
+
+void	flsh(void);
+int	match(char *s);
+vlong	number(long big);
+void	cnull(int cc);
+void	null(int c);
+void	ascii(int cc);
+void	unblock(int cc);
+void	ebcdic(int cc);
+void	ibm(int cc);
+void	block(int cc);
+void	term(void);
+void	stats(void);
+
+#define	iskey(s)	((key[0] == '-') && (strcmp(key+1, s) == 0))
+
+void
+main(int argc, char *argv[])
+{
+	void (*conv)(int);
+	char *ip;
+	char *key;
+	int a, c;
+
+	conv = null;
+	for(c=1; c<argc; c++) {
+		key = argv[c++];
+		if(c >= argc){
+			fprint(2, "dd: arg %s needs a value\n", key);
+			exits("arg");
+		}
+		string = argv[c];
+		if(iskey("ibs")) {
+			ibs = number(BIG);
+			continue;
+		}
+		if(iskey("obs")) {
+			obs = number(BIG);
+			continue;
+		}
+		if(iskey("cbs")) {
+			cbs = number(BIG);
+			continue;
+		}
+		if(iskey("bs")) {
+			bs = number(BIG);
+			continue;
+		}
+		if(iskey("if")) {
+			ifile = string;
+			continue;
+		}
+		if(iskey("of")) {
+			ofile = string;
+			continue;
+		}
+		if(iskey("trunc")) {
+			dotrunc = number(BIG);
+			continue;
+		}
+		if(iskey("skip")) {
+			skip = number(BIG);
+			continue;
+		}
+		if(iskey("seek") || iskey("oseek")) {
+			oseekn = number(BIG);
+			continue;
+		}
+		if(iskey("iseek")) {
+			iseekn = number(BIG);
+			continue;
+		}
+		if(iskey("count")) {
+			count = number(BIG);
+			continue;
+		}
+		if(iskey("files")) {
+			files = number(BIG);
+			continue;
+		}
+		if(iskey("conv")) {
+		cloop:
+			if(match(","))
+				goto cloop;
+			if(*string == '\0')
+				continue;
+			if(match("ebcdic")) {
+				conv = ebcdic;
+				goto cloop;
+			}
+			if(match("ibm")) {
+				conv = ibm;
+				goto cloop;
+			}
+			if(match("ascii")) {
+				conv = ascii;
+				goto cloop;
+			}
+			if(match("block")) {
+				conv = block;
+				goto cloop;
+			}
+			if(match("unblock")) {
+				conv = unblock;
+				goto cloop;
+			}
+			if(match("lcase")) {
+				cflag |= LCASE;
+				goto cloop;
+			}
+			if(match("ucase")) {
+				cflag |= UCASE;
+				goto cloop;
+			}
+			if(match("swab")) {
+				cflag |= SWAB;
+				goto cloop;
+			}
+			if(match("noerror")) {
+				cflag |= NERR;
+				goto cloop;
+			}
+			if(match("sync")) {
+				cflag |= SYNC;
+				goto cloop;
+			}
+		}
+		fprint(2, "dd: bad arg: %s\n", key);
+		exits("arg");
+	}
+	if(conv == null && cflag&(LCASE|UCASE))
+		conv = cnull;
+	if(ifile)
+		ibf = open(ifile, 0);
+	else
+		ibf = dup(0, -1);
+	if(ibf < 0) {
+		fprint(2, "dd: open %s: %r\n", ifile);
+		exits("open");
+	}
+	if(ofile){
+		if(dotrunc)
+			obf = create(ofile, 1, 0664);
+		else
+			obf = open(ofile, 1);
+		if(obf < 0) {
+			fprint(2, "dd: create %s: %r\n", ofile);
+			exits("create");
+		}
+	}else{
+		obf = dup(1, -1);
+		if(obf < 0) {
+			fprint(2, "dd: can't dup file descriptor: %s: %r\n", ofile);
+			exits("dup");
+		}
+	}
+	if(bs)
+		ibs = obs = bs;
+	if(ibs == obs && conv == null)
+		fflag++;
+	if(ibs == 0 || obs == 0) {
+		fprint(2, "dd: counts: cannot be zero\n");
+		exits("counts");
+	}
+	ibuf = sbrk(ibs);
+	if(fflag)
+		obuf = ibuf;
+	else
+		obuf = sbrk(obs);
+	sbrk(64);	/* For good measure */
+	if(ibuf == (char *)-1 || obuf == (char *)-1) {
+		fprint(2, "dd: not enough memory: %r\n");
+		exits("memory");
+	}
+	ibc = 0;
+	obc = 0;
+	cbc = 0;
+	op = obuf;
+
+/*
+	if(signal(SIGINT, SIG_IGN) != SIG_IGN)
+		signal(SIGINT, term);
+*/
+	seek(obf, obs*oseekn, 1);
+	seek(ibf, ibs*iseekn, 1);
+	while(skip) {
+		read(ibf, ibuf, ibs);
+		skip--;
+	}
+
+	ip = 0;
+loop:
+	if(ibc-- == 0) {
+		ibc = 0;
+		if(count==0 || nifr+nipr!=count) {
+			if(cflag&(NERR|SYNC))
+			for(ip=ibuf+ibs; ip>ibuf;)
+				*--ip = 0;
+			ibc = read(ibf, ibuf, ibs);
+		}
+		if(ibc == -1) {
+			perror("read");
+			if((cflag&NERR) == 0) {
+				flsh();
+				term();
+			}
+			ibc = 0;
+			for(c=0; c<ibs; c++)
+				if(ibuf[c] != 0)
+					ibc = c;
+			stats();
+		}
+		if(ibc == 0 && --files<=0) {
+			flsh();
+			term();
+		}
+		if(ibc != ibs) {
+			nipr++;
+			if(cflag&SYNC)
+				ibc = ibs;
+		} else
+			nifr++;
+		ip = ibuf;
+		c = (ibc>>1) & ~1;
+		if(cflag&SWAB && c)
+		do {
+			a = *ip++;
+			ip[-1] = *ip;
+			*ip++ = a;
+		} while(--c);
+		ip = ibuf;
+		if(fflag) {
+			obc = ibc;
+			flsh();
+			ibc = 0;
+		}
+		goto loop;
+	}
+	c = 0;
+	c |= *ip++;
+	c &= 0377;
+	(*conv)(c);
+	goto loop;
+}
+
+void
+flsh(void)
+{
+	int c;
+
+	if(obc) {
+		c = write(obf, obuf, obc);
+		if(c != obc) {
+			if(c > 0)
+				++nopr;
+			perror("write");
+			term();
+		}
+		if(obc == obs)
+			nofr++;
+		else
+			nopr++;
+		obc = 0;
+	}
+}
+
+int
+match(char *s)
+{
+	char *cs;
+
+	cs = string;
+	while(*cs++ == *s)
+		if(*s++ == '\0')
+			goto true;
+	if(*s != '\0')
+		return 0;
+
+true:
+	cs--;
+	string = cs;
+	return 1;
+}
+
+vlong
+number(long big)
+{
+	char *cs;
+	vlong n;
+
+	cs = string;
+	n = 0;
+	while(*cs >= '0' && *cs <= '9')
+		n = n*10 + *cs++ - '0';
+	for(;;)
+	switch(*cs++) {
+
+	case 'k':
+		n *= 1024;
+		continue;
+
+/*	case 'w':
+		n *= sizeof(int);
+		continue;
+*/
+
+	case 'b':
+		n *= 512;
+		continue;
+
+/*	case '*':*/
+	case 'x':
+		string = cs;
+		n *= number(BIG);
+
+	case '\0':
+		if(n>=big || n<0) {
+			fprint(2, "dd: argument %lld out of range\n", n);
+			exits("range");
+		}
+		return n;
+	}
+	/* never gets here */
+}
+
+void
+cnull(int cc)
+{
+	int c;
+
+	c = cc;
+	if((cflag&UCASE) && c>='a' && c<='z')
+		c += 'A'-'a';
+	if((cflag&LCASE) && c>='A' && c<='Z')
+		c += 'a'-'A';
+	null(c);
+}
+
+void
+null(int c)
+{
+
+	*op = c;
+	op++;
+	if(++obc >= obs) {
+		flsh();
+		op = obuf;
+	}
+}
+
+void
+ascii(int cc)
+{
+	int c;
+
+	c = etoa[cc];
+	if(cbs == 0) {
+		cnull(c);
+		return;
+	}
+	if(c == ' ') {
+		nspace++;
+		goto out;
+	}
+	while(nspace > 0) {
+		null(' ');
+		nspace--;
+	}
+	cnull(c);
+
+out:
+	if(++cbc >= cbs) {
+		null('\n');
+		cbc = 0;
+		nspace = 0;
+	}
+}
+
+void
+unblock(int cc)
+{
+	int c;
+
+	c = cc & 0377;
+	if(cbs == 0) {
+		cnull(c);
+		return;
+	}
+	if(c == ' ') {
+		nspace++;
+		goto out;
+	}
+	while(nspace > 0) {
+		null(' ');
+		nspace--;
+	}
+	cnull(c);
+
+out:
+	if(++cbc >= cbs) {
+		null('\n');
+		cbc = 0;
+		nspace = 0;
+	}
+}
+
+void
+ebcdic(int cc)
+{
+	int c;
+
+	c = cc;
+	if(cflag&UCASE && c>='a' && c<='z')
+		c += 'A'-'a';
+	if(cflag&LCASE && c>='A' && c<='Z')
+		c += 'a'-'A';
+	c = atoe[c];
+	if(cbs == 0) {
+		null(c);
+		return;
+	}
+	if(cc == '\n') {
+		while(cbc < cbs) {
+			null(atoe[' ']);
+			cbc++;
+		}
+		cbc = 0;
+		return;
+	}
+	if(cbc == cbs)
+		ntrunc++;
+	cbc++;
+	if(cbc <= cbs)
+		null(c);
+}
+
+void
+ibm(int cc)
+{
+	int c;
+
+	c = cc;
+	if(cflag&UCASE && c>='a' && c<='z')
+		c += 'A'-'a';
+	if(cflag&LCASE && c>='A' && c<='Z')
+		c += 'a'-'A';
+	c = atoibm[c] & 0377;
+	if(cbs == 0) {
+		null(c);
+		return;
+	}
+	if(cc == '\n') {
+		while(cbc < cbs) {
+			null(atoibm[' ']);
+			cbc++;
+		}
+		cbc = 0;
+		return;
+	}
+	if(cbc == cbs)
+		ntrunc++;
+	cbc++;
+	if(cbc <= cbs)
+		null(c);
+}
+
+void
+block(int cc)
+{
+	int c;
+
+	c = cc;
+	if(cflag&UCASE && c>='a' && c<='z')
+		c += 'A'-'a';
+	if(cflag&LCASE && c>='A' && c<='Z')
+		c += 'a'-'A';
+	c &= 0377;
+	if(cbs == 0) {
+		null(c);
+		return;
+	}
+	if(cc == '\n') {
+		while(cbc < cbs) {
+			null(' ');
+			cbc++;
+		}
+		cbc = 0;
+		return;
+	}
+	if(cbc == cbs)
+		ntrunc++;
+	cbc++;
+	if(cbc <= cbs)
+		null(c);
+}
+
+void
+term(void)
+{
+
+	stats();
+	exits(0);
+}
+
+void
+stats(void)
+{
+
+	fprint(2, "%lud+%lud records in\n", nifr, nipr);
+	fprint(2, "%lud+%lud records out\n", nofr, nopr);
+	if(ntrunc)
+		fprint(2, "%lud truncated records\n", ntrunc);
+}
+
+uchar	etoa[] =
+{
+	0000,0001,0002,0003,0234,0011,0206,0177,
+	0227,0215,0216,0013,0014,0015,0016,0017,
+	0020,0021,0022,0023,0235,0205,0010,0207,
+	0030,0031,0222,0217,0034,0035,0036,0037,
+	0200,0201,0202,0203,0204,0012,0027,0033,
+	0210,0211,0212,0213,0214,0005,0006,0007,
+	0220,0221,0026,0223,0224,0225,0226,0004,
+	0230,0231,0232,0233,0024,0025,0236,0032,
+	0040,0240,0241,0242,0243,0244,0245,0246,
+	0247,0250,0133,0056,0074,0050,0053,0041,
+	0046,0251,0252,0253,0254,0255,0256,0257,
+	0260,0261,0135,0044,0052,0051,0073,0136,
+	0055,0057,0262,0263,0264,0265,0266,0267,
+	0270,0271,0174,0054,0045,0137,0076,0077,
+	0272,0273,0274,0275,0276,0277,0300,0301,
+	0302,0140,0072,0043,0100,0047,0075,0042,
+	0303,0141,0142,0143,0144,0145,0146,0147,
+	0150,0151,0304,0305,0306,0307,0310,0311,
+	0312,0152,0153,0154,0155,0156,0157,0160,
+	0161,0162,0313,0314,0315,0316,0317,0320,
+	0321,0176,0163,0164,0165,0166,0167,0170,
+	0171,0172,0322,0323,0324,0325,0326,0327,
+	0330,0331,0332,0333,0334,0335,0336,0337,
+	0340,0341,0342,0343,0344,0345,0346,0347,
+	0173,0101,0102,0103,0104,0105,0106,0107,
+	0110,0111,0350,0351,0352,0353,0354,0355,
+	0175,0112,0113,0114,0115,0116,0117,0120,
+	0121,0122,0356,0357,0360,0361,0362,0363,
+	0134,0237,0123,0124,0125,0126,0127,0130,
+	0131,0132,0364,0365,0366,0367,0370,0371,
+	0060,0061,0062,0063,0064,0065,0066,0067,
+	0070,0071,0372,0373,0374,0375,0376,0377,
+};
+uchar	atoe[] =
+{
+	0000,0001,0002,0003,0067,0055,0056,0057,
+	0026,0005,0045,0013,0014,0015,0016,0017,
+	0020,0021,0022,0023,0074,0075,0062,0046,
+	0030,0031,0077,0047,0034,0035,0036,0037,
+	0100,0117,0177,0173,0133,0154,0120,0175,
+	0115,0135,0134,0116,0153,0140,0113,0141,
+	0360,0361,0362,0363,0364,0365,0366,0367,
+	0370,0371,0172,0136,0114,0176,0156,0157,
+	0174,0301,0302,0303,0304,0305,0306,0307,
+	0310,0311,0321,0322,0323,0324,0325,0326,
+	0327,0330,0331,0342,0343,0344,0345,0346,
+	0347,0350,0351,0112,0340,0132,0137,0155,
+	0171,0201,0202,0203,0204,0205,0206,0207,
+	0210,0211,0221,0222,0223,0224,0225,0226,
+	0227,0230,0231,0242,0243,0244,0245,0246,
+	0247,0250,0251,0300,0152,0320,0241,0007,
+	0040,0041,0042,0043,0044,0025,0006,0027,
+	0050,0051,0052,0053,0054,0011,0012,0033,
+	0060,0061,0032,0063,0064,0065,0066,0010,
+	0070,0071,0072,0073,0004,0024,0076,0341,
+	0101,0102,0103,0104,0105,0106,0107,0110,
+	0111,0121,0122,0123,0124,0125,0126,0127,
+	0130,0131,0142,0143,0144,0145,0146,0147,
+	0150,0151,0160,0161,0162,0163,0164,0165,
+	0166,0167,0170,0200,0212,0213,0214,0215,
+	0216,0217,0220,0232,0233,0234,0235,0236,
+	0237,0240,0252,0253,0254,0255,0256,0257,
+	0260,0261,0262,0263,0264,0265,0266,0267,
+	0270,0271,0272,0273,0274,0275,0276,0277,
+	0312,0313,0314,0315,0316,0317,0332,0333,
+	0334,0335,0336,0337,0352,0353,0354,0355,
+	0356,0357,0372,0373,0374,0375,0376,0377,
+};
+uchar	atoibm[] =
+{
+	0000,0001,0002,0003,0067,0055,0056,0057,
+	0026,0005,0045,0013,0014,0015,0016,0017,
+	0020,0021,0022,0023,0074,0075,0062,0046,
+	0030,0031,0077,0047,0034,0035,0036,0037,
+	0100,0132,0177,0173,0133,0154,0120,0175,
+	0115,0135,0134,0116,0153,0140,0113,0141,
+	0360,0361,0362,0363,0364,0365,0366,0367,
+	0370,0371,0172,0136,0114,0176,0156,0157,
+	0174,0301,0302,0303,0304,0305,0306,0307,
+	0310,0311,0321,0322,0323,0324,0325,0326,
+	0327,0330,0331,0342,0343,0344,0345,0346,
+	0347,0350,0351,0255,0340,0275,0137,0155,
+	0171,0201,0202,0203,0204,0205,0206,0207,
+	0210,0211,0221,0222,0223,0224,0225,0226,
+	0227,0230,0231,0242,0243,0244,0245,0246,
+	0247,0250,0251,0300,0117,0320,0241,0007,
+	0040,0041,0042,0043,0044,0025,0006,0027,
+	0050,0051,0052,0053,0054,0011,0012,0033,
+	0060,0061,0032,0063,0064,0065,0066,0010,
+	0070,0071,0072,0073,0004,0024,0076,0341,
+	0101,0102,0103,0104,0105,0106,0107,0110,
+	0111,0121,0122,0123,0124,0125,0126,0127,
+	0130,0131,0142,0143,0144,0145,0146,0147,
+	0150,0151,0160,0161,0162,0163,0164,0165,
+	0166,0167,0170,0200,0212,0213,0214,0215,
+	0216,0217,0220,0232,0233,0234,0235,0236,
+	0237,0240,0252,0253,0254,0255,0256,0257,
+	0260,0261,0262,0263,0264,0265,0266,0267,
+	0270,0271,0272,0273,0274,0275,0276,0277,
+	0312,0313,0314,0315,0316,0317,0332,0333,
+	0334,0335,0336,0337,0352,0353,0354,0355,
+	0356,0357,0372,0373,0374,0375,0376,0377,
+};
--- a/diff/Makefile
+++ b/diff/Makefile
@@ -0,0 +1,35 @@
+# diff - diff shell unix port from plan9
+# Depends on ../lib9
+
+TARG      = diff
+OFILES    = diffdir.o diffio.o diffreg.o main.o
+MANFILES  = diff.1
+
+include ../config.mk
+
+all: ${TARG}
+	@strip ${TARG}
+	@echo built ${TARG}
+
+install: ${TARG}
+	@mkdir -p ${DESTDIR}${PREFIX}/bin
+	@cp -f ${TARG} ${DESTDIR}${PREFIX}/bin/
+	@chmod 755 ${DESTDIR}${PREFIX}/bin/${TARG}
+	@mkdir -p ${DESTDIR}${MANPREFIX}/man1
+	@cp -f ${MANFILES} ${DESTDIR}${MANPREFIX}/man1
+	@chmod 444 ${DESTDIR}${MANPREFIX}/man1/${MANFILES}
+
+uninstall:
+	rm -f ${DESTDIR}${PREFIX}/bin/${TARG}
+	rm -f ${DESTDIR}${PREFIX}/man1/${MANFILES}
+
+.c.o:
+	@echo CC $*.c
+	@${CC} ${CFLAGS} -I../lib9 -I${PREFIX}/include -I../lib9 $*.c
+
+clean:
+	rm -f ${OFILES} ${TARG}
+
+${TARG}: ${OFILES}
+	@echo LD ${TARG}
+	@${CC} ${LDFLAGS} -o ${TARG} ${OFILES} -lm -L${PREFIX}/lib -L../lib9 -l9
--- a/diff/diff.1
+++ b/diff/diff.1
@@ -0,0 +1,163 @@
+.TH DIFF 1 
+.SH NAME
+diff \- differential file comparator
+.SH SYNOPSIS
+.B diff
+[
+.B -acefmnbwr
+] file1 ... file2
+.SH DESCRIPTION
+.I Diff
+tells what lines must be changed in two files to bring them
+into agreement.
+If one file
+is a directory,
+then a file in that directory with basename the same as that of
+the other file is used.
+If both files are directories, similarly named files in the
+two directories are compared by the method of 
+.I diff
+for text
+files and
+.IR cmp (1)
+otherwise.
+If more than two file names are given, then each argument is compared
+to the last argument as above.
+The 
+.B -r
+option causes
+.I diff
+to process similarly named subdirectories recursively.
+When processing more than one file, 
+.I diff
+prefixes file differences with a single line
+listing the two differing files, in the form of
+a 
+.I diff
+command line.
+The
+.B -m
+flag causes this behavior even when processing single files.
+.PP
+The normal output contains lines of these forms:
+.IP "" 5
+.I n1
+.B a
+.I n3,n4
+.br
+.I n1,n2
+.B d
+.I n3
+.br
+.I n1,n2
+.B c
+.I n3,n4
+.PP
+These lines resemble
+.I ed
+commands to convert
+.I file1
+into
+.IR file2 .
+The numbers after the letters pertain to
+.IR file2 .
+In fact, by exchanging `a' for `d' and reading backward
+one may ascertain equally how to convert 
+.I file2
+into
+.IR file1 .
+As in 
+.IR ed ,
+identical pairs where
+.I n1
+=
+.I n2
+or
+.I n3
+=
+.I n4
+are abbreviated as a single number.
+.PP
+Following each of these lines come all the lines that are
+affected in the first file flagged by `<', 
+then all the lines that are affected in the second file
+flagged by `>'.
+.PP
+The
+.B -b
+option causes
+trailing blanks (spaces and tabs) to be ignored
+and other strings of blanks to compare equal.
+The
+.B -w
+option causes all white-space to be removed from input lines
+before applying the difference algorithm.
+.PP
+The
+.B -n
+option prefixes each range with 
+.IB file : \fR
+and inserts a space around the 
+.BR a ,
+.BR c ,
+and
+.B d
+verbs.
+The
+.B -e
+option produces a script of
+.I "a, c"
+and 
+.I d
+commands for the editor
+.IR ed ,
+which will recreate
+.I file2
+from
+.IR file1 .
+The
+.B -f
+option produces a similar script,
+not useful with
+.IR ed ,
+in the opposite order. It may, however, be
+useful as input to a stream-oriented post-processor.
+.PP
+The
+.B -c
+option includes three lines of context around each
+change, merging changes whose contexts overlap.
+The
+.B -a
+flag displays the entire file as context.
+.PP
+Except in rare circumstances,
+.I diff
+finds a smallest sufficient set of file
+differences.
+.SH FILES
+.B /tmp/diff[12]
+.SH SOURCE
+.B \*9/src/cmd/diff
+.SH "SEE ALSO"
+.IR cmp (1),
+.IR comm (1),
+.IR ed (1)
+.SH DIAGNOSTICS
+Exit status is the empty string
+for no differences,
+.L some
+for some, 
+and
+.L error
+for trouble.
+.SH BUGS
+Editing scripts produced under the
+.BR -e " or"
+.BR -f " option are naive about"
+creating lines consisting of a single `\fB.\fR'.
+.PP
+When running
+.I diff
+on directories, the notion of what is a text
+file is open to debate.
--- a/diff/diff.h
+++ b/diff/diff.h
@@ -0,0 +1,27 @@
+#define stdout bstdout
+
+char mode;			/* '\0', 'e', 'f', 'h' */
+char bflag;			/* ignore multiple and trailing blanks */
+char rflag;			/* recurse down directory trees */
+char mflag;			/* pseudo flag: doing multiple files, one dir */
+int anychange;
+extern Biobuf	stdout;
+extern int	binary;
+
+#define MALLOC(t, n)		((t *)emalloc((n)*sizeof(t)))
+#define REALLOC(p, t, n)	((t *)erealloc((void *)(p), (n)*sizeof(t)))
+#define FREE(p)			free((void *)(p))
+
+#define MAXPATHLEN	1024
+
+int mkpathname(char *, char *, char *);
+void *emalloc(unsigned);
+void *erealloc(void *, unsigned);
+void diff(char *, char *, int);
+void diffdir(char *, char *, int);
+void diffreg(char *, char *);
+Biobuf *prepare(int, char *);
+void panic(int, char *, ...);
+void check(Biobuf *, Biobuf *);
+void change(int, int, int, int);
+void flushchanges(void);
--- a/diff/diffdir.c
+++ b/diff/diffdir.c
@@ -0,0 +1,113 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "diff.h"
+
+static int
+itemcmp(const void *v1, const void *v2)
+{
+	char *const*d1 = v1, *const*d2 = v2;
+
+	return strcmp(*d1, *d2);
+}
+
+static char **
+scandir(char *name)
+{
+	char **cp;
+	Dir *db;
+	int nitems;
+	int fd, n;
+
+	if ((fd = open(name, OREAD)) < 0){
+		panic(mflag ? 0 : 2, "can't open %s\n", name);
+		return nil;
+	}
+	cp = 0;
+	nitems = 0;
+	if((n = dirreadall(fd, &db)) > 0){
+		while (n--) {
+			cp = REALLOC(cp, char *, (nitems+1));
+			cp[nitems] = MALLOC(char, strlen((db+n)->name)+1);
+			strcpy(cp[nitems], (db+n)->name);
+			nitems++;
+		}
+		free(db);
+	}
+	cp = REALLOC(cp, char*, (nitems+1));
+	cp[nitems] = 0;
+	close(fd);
+	qsort((char *)cp, nitems, sizeof(char*), itemcmp);
+	return cp;
+}
+
+static int
+isdotordotdot(char *p)
+{
+	if (*p == '.') {
+		if (!p[1])
+			return 1;
+		if (p[1] == '.' && !p[2])
+			return 1;
+	}
+	return 0;
+}
+
+void
+diffdir(char *f, char *t, int level)
+{
+	char  **df, **dt, **dirf, **dirt;
+	char *from, *to;
+	int res;
+	char fb[MAXPATHLEN+1], tb[MAXPATHLEN+1];
+
+	df = scandir(f);
+	dt = scandir(t);
+	dirf = df;
+	dirt = dt;
+	if(df == nil || dt == nil)
+		goto Out;
+	while (*df || *dt) {
+		from = *df;
+		to = *dt;
+		if (from && isdotordotdot(from)) {
+			df++;
+			continue;
+		}
+		if (to && isdotordotdot(to)) {
+			dt++;
+			continue;
+		}
+		if (!from)
+			res = 1;
+		else if (!to)
+			res = -1;
+		else
+			res = strcmp(from, to);
+		if (res < 0) {
+			if (mode == 0 || mode == 'n')
+				Bprint(&stdout, "Only in %s: %s\n", f, from);
+			df++;
+			continue;
+		}
+		if (res > 0) {
+			if (mode == 0 || mode == 'n')
+				Bprint(&stdout, "Only in %s: %s\n", t, to);
+			dt++;
+			continue;
+		}
+		if (mkpathname(fb, f, from))
+			continue;
+		if (mkpathname(tb, t, to))
+			continue;
+		diff(fb, tb, level+1);
+		df++; dt++;
+	}
+Out:
+	for (df = dirf; df && *df; df++)
+		FREE(*df);
+	for (dt = dirt; dt && *dt; dt++)
+		FREE(*dt);
+	FREE(dirf);
+	FREE(dirt);
+}
--- a/diff/diffio.c
+++ b/diff/diffio.c
@@ -0,0 +1,387 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <ctype.h>
+#include "diff.h"
+
+struct line {
+	int	serial;
+	int	value;
+};
+extern struct line *file[2];
+extern int len[2];
+extern long *ixold, *ixnew;
+extern int *J;
+
+static Biobuf *input[2];
+static char *file1, *file2;
+static int firstchange;
+
+#define MAXLINELEN	4096
+#define MIN(x, y)	((x) < (y) ? (x): (y))
+
+static int
+readline(Biobuf *bp, char *buf)
+{
+	int c;
+	char *p, *e;
+
+	p = buf;
+	e = p + MAXLINELEN-1;
+	do {
+		c = Bgetc(bp);
+		if (c < 0) {
+			if (p == buf)
+				return -1;
+			break;
+		}
+		if (c == '\n')
+			break;
+		*p++ = c;
+	} while (p < e);
+	*p = 0;
+	if (c != '\n' && c >= 0) {
+		do c = Bgetc(bp);
+		while (c >= 0 && c != '\n');
+	}
+	return p - buf;
+}
+
+#define HALFLONG 16
+#define low(x)	(x&((1L<<HALFLONG)-1))
+#define high(x)	(x>>HALFLONG)
+
+/*
+ * hashing has the effect of
+ * arranging line in 7-bit bytes and then
+ * summing 1-s complement in 16-bit hunks 
+ */
+static int
+readhash(Biobuf *bp, char *buf)
+{
+	long sum;
+	unsigned shift;
+	char *p;
+	int len, space;
+
+	sum = 1;
+	shift = 0;
+	if ((len = readline(bp, buf)) == -1)
+		return 0;
+	p = buf;
+	switch(bflag)	/* various types of white space handling */
+	{
+	case 0:
+		while (len--) {
+			sum += (long)*p++ << (shift &= (HALFLONG-1));
+			shift += 7;
+		}
+		break;
+	case 1:
+		/*
+		 * coalesce multiple white-space
+		 */
+		for (space = 0; len--; p++) {
+			if (isspace((uchar)*p)) {
+				space++;
+				continue;
+			}
+			if (space) {
+				shift += 7;
+				space = 0;
+			}
+			sum += (long)*p << (shift &= (HALFLONG-1));
+			shift += 7;
+		}
+		break;
+	default:
+		/*
+		 * strip all white-space
+		 */
+		while (len--) {
+			if (isspace((uchar)*p)) {
+				p++;
+				continue;
+			}
+			sum += (long)*p++ << (shift &= (HALFLONG-1));
+			shift += 7;
+		}
+		break;
+	}
+	sum = low(sum) + high(sum);
+	return ((short)low(sum) + (short)high(sum));
+}
+
+Biobuf *
+prepare(int i, char *arg)
+{
+	struct line *p;
+	int j, h;
+	Biobuf *bp;
+	char *cp, buf[MAXLINELEN];
+	int nbytes;
+	Rune r;
+
+	bp = Bopen(arg, OREAD);
+	if (!bp) {
+		panic(mflag ? 0: 2, "cannot open %s: %r\n", arg);
+		return 0;
+	}
+	if (binary)
+		return bp;
+	nbytes = Bread(bp, buf, MIN(1024, MAXLINELEN));
+	if (nbytes > 0) {
+		cp = buf;
+		while (cp < buf+nbytes-UTFmax) {
+			/*
+			 * heuristic for a binary file in the
+			 * brave new UNICODE world
+			 */
+			cp += chartorune(&r, cp);
+			if (r == 0 || (r > 0x7f && r <= 0xa0)) {
+				binary++;
+				return bp;
+			}
+		}
+		Bseek(bp, 0, 0);
+	}
+	p = MALLOC(struct line, 3);
+	for (j = 0; h = readhash(bp, buf); p[j].value = h)
+		p = REALLOC(p, struct line, (++j+3));
+	len[i] = j;
+	file[i] = p;
+	input[i] = bp;			/*fix*/
+	if (i == 0) {			/*fix*/
+		file1 = arg;
+		firstchange = 0;
+	}
+	else
+		file2 = arg;
+	return bp;
+}
+
+static int
+squishspace(char *buf)
+{
+	char *p, *q;
+	int space;
+
+	for (space = 0, q = p = buf; *q; q++) {
+		if (isspace((uchar)*q)) {
+			space++;
+			continue;
+		}
+		if (space && bflag == 1) {
+			*p++ = ' ';
+			space = 0;
+		}
+		*p++ = *q;
+	}
+	*p = 0;
+	return p - buf;
+}
+
+/*
+ * need to fix up for unexpected EOF's
+ */
+void
+check(Biobuf *bf, Biobuf *bt)
+{
+	int f, t, flen, tlen;
+	char fbuf[MAXLINELEN], tbuf[MAXLINELEN];
+
+	ixold[0] = ixnew[0] = 0;
+	for (f = t = 1; f < len[0]; f++) {
+		flen = readline(bf, fbuf);
+		ixold[f] = ixold[f-1] + flen + 1;		/* ftell(bf) */
+		if (J[f] == 0)
+			continue;
+		do {
+			tlen = readline(bt, tbuf);
+			ixnew[t] = ixnew[t-1] + tlen + 1;	/* ftell(bt) */
+		} while (t++ < J[f]);
+		if (bflag) {
+			flen = squishspace(fbuf);
+			tlen = squishspace(tbuf);
+		}
+		if (flen != tlen || strcmp(fbuf, tbuf))
+			J[f] = 0;
+	}
+	while (t < len[1]) {
+		tlen = readline(bt, tbuf);
+		ixnew[t] = ixnew[t-1] + tlen + 1;	/* fseek(bt) */
+		t++;
+	}
+}
+
+static void
+range(int a, int b, char *separator)
+{
+	Bprint(&stdout, "%d", a > b ? b: a);
+	if (a < b)
+		Bprint(&stdout, "%s%d", separator, b);
+}
+
+static void
+fetch(long *f, int a, int b, Biobuf *bp, char *s)
+{
+	char buf[MAXLINELEN];
+	int maxb;
+
+	if(a <= 1)
+		a = 1;
+	if(bp == input[0])
+		maxb = len[0];
+	else
+		maxb = len[1];
+	if(b > maxb)
+		b = maxb;
+	if(a > maxb)
+		return;
+	Bseek(bp, f[a-1], 0);
+	while (a++ <= b) {
+		readline(bp, buf);
+		Bprint(&stdout, "%s%s\n", s, buf);
+	}
+}
+
+typedef struct Change Change;
+struct Change
+{
+	int a;
+	int b;
+	int c;
+	int d;
+};
+
+Change *changes;
+int nchanges;
+
+void
+change(int a, int b, int c, int d)
+{
+	char verb;
+	char buf[4];
+	Change *ch;
+
+	if (a > b && c > d)
+		return;
+	anychange = 1;
+	if (mflag && firstchange == 0) {
+		if(mode) {
+			buf[0] = '-';
+			buf[1] = mode;
+			buf[2] = ' ';
+			buf[3] = '\0';
+		} else {
+			buf[0] = '\0';
+		}
+		Bprint(&stdout, "diff %s%s %s\n", buf, file1, file2);
+		firstchange = 1;
+	}
+	verb = a > b ? 'a': c > d ? 'd': 'c';
+	switch(mode) {
+	case 'e':
+		range(a, b, ",");
+		Bputc(&stdout, verb);
+		break;
+	case 0:
+		range(a, b, ",");
+		Bputc(&stdout, verb);
+		range(c, d, ",");
+		break;
+	case 'n':
+		Bprint(&stdout, "%s:", file1);
+		range(a, b, ",");
+		Bprint(&stdout, " %c ", verb);
+		Bprint(&stdout, "%s:", file2);
+		range(c, d, ",");
+		break;
+	case 'f':
+		Bputc(&stdout, verb);
+		range(a, b, " ");
+		break;
+	case 'c':
+	case 'a':
+		if(nchanges%1024 == 0)
+			changes = erealloc(changes, (nchanges+1024)*sizeof(changes[0]));
+		ch = &changes[nchanges++];
+		ch->a = a;
+		ch->b = b;
+		ch->c = c;
+		ch->d = d;
+		return;
+	}
+	Bputc(&stdout, '\n');
+	if (mode == 0 || mode == 'n') {
+		fetch(ixold, a, b, input[0], "< ");
+		if (a <= b && c <= d)
+			Bprint(&stdout, "---\n");
+	}
+	fetch(ixnew, c, d, input[1], mode == 0 || mode == 'n' ? "> ": "");
+	if (mode != 0 && mode != 'n' && c <= d)
+		Bprint(&stdout, ".\n");
+}
+
+enum
+{
+	Lines = 3		/* number of lines of context shown */
+};
+
+int
+changeset(int i)
+{
+	while(i<nchanges && changes[i].b+1+2*Lines > changes[i+1].a)
+		i++;
+	if(i<nchanges)
+		return i+1;
+	return nchanges;
+}
+
+void
+flushchanges(void)
+{
+	int a, b, c, d, at;
+	int i, j;
+
+	if(nchanges == 0)
+		return;
+	
+	for(i=0; i<nchanges; ){
+		j = changeset(i);
+		a = changes[i].a-Lines;
+		b = changes[j-1].b+Lines;
+		c = changes[i].c-Lines;
+		d = changes[j-1].d+Lines;
+		if(a < 1)
+			a = 1;
+		if(c < 1)
+			c = 1;
+		if(b > len[0])
+			b = len[0];
+		if(d > len[1])
+			d = len[1];
+		if(mode == 'a'){
+			a = 1;
+			b = len[0];
+			c = 1;
+			d = len[1];
+			j = nchanges;
+		}
+		Bprint(&stdout, "%s:", file1);
+		range(a, b, ",");
+		Bprint(&stdout, " - ");
+		Bprint(&stdout, "%s:", file2);
+		range(c, d, ",");
+		Bputc(&stdout, '\n');
+		at = a;
+		for(; i<j; i++){
+			fetch(ixold, at, changes[i].a-1, input[0], "  ");
+			fetch(ixold, changes[i].a, changes[i].b, input[0], "- ");
+			fetch(ixnew, changes[i].c, changes[i].d, input[1], "+ ");
+			at = changes[i].b+1;
+		}
+		fetch(ixold, at, b, input[0], "  ");
+	}
+	nchanges = 0;
+}
--- a/diff/diffreg.c
+++ b/diff/diffreg.c
@@ -0,0 +1,420 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "diff.h"
+
+/*	diff - differential file comparison
+*
+*	Uses an algorithm due to Harold Stone, which finds
+*	a pair of longest identical subsequences in the two
+*	files.
+*
+*	The major goal is to generate the match vector J.
+*	J[i] is the index of the line in file1 corresponding
+*	to line i file0. J[i] = 0 if there is no
+*	such line in file1.
+*
+*	Lines are hashed so as to work in core. All potential
+*	matches are located by sorting the lines of each file
+*	on the hash (called value). In particular, this
+*	collects the equivalence classes in file1 together.
+*	Subroutine equiv replaces the value of each line in
+*	file0 by the index of the first element of its 
+*	matching equivalence in (the reordered) file1.
+*	To save space equiv squeezes file1 into a single
+*	array member in which the equivalence classes
+*	are simply concatenated, except that their first
+*	members are flagged by changing sign.
+*
+*	Next the indices that point into member are unsorted into
+*	array class according to the original order of file0.
+*
+*	The cleverness lies in routine stone. This marches
+*	through the lines of file0, developing a vector klist
+*	of "k-candidates". At step i a k-candidate is a matched
+*	pair of lines x,y (x in file0 y in file1) such that
+*	there is a common subsequence of lenght k
+*	between the first i lines of file0 and the first y 
+*	lines of file1, but there is no such subsequence for
+*	any smaller y. x is the earliest possible mate to y
+*	that occurs in such a subsequence.
+*
+*	Whenever any of the members of the equivalence class of
+*	lines in file1 matable to a line in file0 has serial number 
+*	less than the y of some k-candidate, that k-candidate 
+*	with the smallest such y is replaced. The new 
+*	k-candidate is chained (via pred) to the current
+*	k-1 candidate so that the actual subsequence can
+*	be recovered. When a member has serial number greater
+*	that the y of all k-candidates, the klist is extended.
+*	At the end, the longest subsequence is pulled out
+*	and placed in the array J by unravel.
+*
+*	With J in hand, the matches there recorded are
+*	check'ed against reality to assure that no spurious
+*	matches have crept in due to hashing. If they have,
+*	they are broken, and "jackpot " is recorded--a harmless
+*	matter except that a true match for a spuriously
+*	mated line may now be unnecessarily reported as a change.
+*
+*	Much of the complexity of the program comes simply
+*	from trying to minimize core utilization and
+*	maximize the range of doable problems by dynamically
+*	allocating what is needed and reusing what is not.
+*	The core requirements for problems larger than somewhat
+*	are (in words) 2*length(file0) + length(file1) +
+*	3*(number of k-candidates installed),  typically about
+*	6n words for files of length n. 
+*/
+/* TIDY THIS UP */
+struct cand {
+	int x;
+	int y;
+	int pred;
+} cand;
+struct line {
+	int serial;
+	int value;
+} *file[2], line;
+int len[2];
+int binary;
+struct line *sfile[2];	/*shortened by pruning common prefix and suffix*/
+int slen[2];
+int pref, suff;	/*length of prefix and suffix*/
+int *class;	/*will be overlaid on file[0]*/
+int *member;	/*will be overlaid on file[1]*/
+int *klist;		/*will be overlaid on file[0] after class*/
+struct cand *clist;	/* merely a free storage pot for candidates */
+int clen;
+int *J;		/*will be overlaid on class*/
+long *ixold;	/*will be overlaid on klist*/
+long *ixnew;	/*will be overlaid on file[1]*/
+/* END OF SOME TIDYING */
+
+static void	
+sort(struct line *a, int n)	/*shellsort CACM #201*/
+{
+	int m;
+	struct line *ai, *aim, *j, *k;
+	struct line w;
+	int i;
+
+	m = 0;
+	for (i = 1; i <= n; i *= 2)
+		m = 2*i - 1;
+	for (m /= 2; m != 0; m /= 2) {
+		k = a+(n-m);
+		for (j = a+1; j <= k; j++) {
+			ai = j;
+			aim = ai+m;
+			do {
+				if (aim->value > ai->value ||
+				   aim->value == ai->value &&
+				   aim->serial > ai->serial)
+					break;
+				w = *ai;
+				*ai = *aim;
+				*aim = w;
+
+				aim = ai;
+				ai -= m;
+			} while (ai > a && aim >= ai);
+		}
+	}
+}
+
+static void
+unsort(struct line *f, int l, int *b)
+{
+	int *a;
+	int i;
+
+	a = MALLOC(int, (l+1));
+	for(i=1;i<=l;i++)
+		a[f[i].serial] = f[i].value;
+	for(i=1;i<=l;i++)
+		b[i] = a[i];
+	FREE(a);
+}
+
+static void
+prune(void)
+{
+	int i,j;
+
+	for(pref=0;pref<len[0]&&pref<len[1]&&
+		file[0][pref+1].value==file[1][pref+1].value;
+		pref++ ) ;
+	for(suff=0;suff<len[0]-pref&&suff<len[1]-pref&&
+		file[0][len[0]-suff].value==file[1][len[1]-suff].value;
+		suff++) ;
+	for(j=0;j<2;j++) {
+		sfile[j] = file[j]+pref;
+		slen[j] = len[j]-pref-suff;
+		for(i=0;i<=slen[j];i++)
+			sfile[j][i].serial = i;
+	}
+}
+
+static void
+equiv(struct line *a, int n, struct line *b, int m, int *c)
+{
+	int i, j;
+
+	i = j = 1;
+	while(i<=n && j<=m) {
+		if(a[i].value < b[j].value)
+			a[i++].value = 0;
+		else if(a[i].value == b[j].value)
+			a[i++].value = j;
+		else
+			j++;
+	}
+	while(i <= n)
+		a[i++].value = 0;
+	b[m+1].value = 0;
+	j = 0;
+	while(++j <= m) {
+		c[j] = -b[j].serial;
+		while(b[j+1].value == b[j].value) {
+			j++;
+			c[j] = b[j].serial;
+		}
+	}
+	c[j] = -1;
+}
+
+static int
+newcand(int x, int  y, int pred)
+{
+	struct cand *q;
+
+	clist = REALLOC(clist, struct cand, (clen+1));
+	q = clist + clen;
+	q->x = x;
+	q->y = y;
+	q->pred = pred;
+	return clen++;
+}
+
+static int
+search(int *c, int k, int y)
+{
+	int i, j, l;
+	int t;
+
+	if(clist[c[k]].y < y)	/*quick look for typical case*/
+		return k+1;
+	i = 0;
+	j = k+1;
+	while((l=(i+j)/2) > i) {
+		t = clist[c[l]].y;
+		if(t > y)
+			j = l;
+		else if(t < y)
+			i = l;
+		else
+			return l;
+	}
+	return l+1;
+}
+
+static int
+stone(int *a, int n, int *b, int *c)
+{
+	int i, k,y;
+	int j, l;
+	int oldc, tc;
+	int oldl;
+
+	k = 0;
+	c[0] = newcand(0,0,0);
+	for(i=1; i<=n; i++) {
+		j = a[i];
+		if(j==0)
+			continue;
+		y = -b[j];
+		oldl = 0;
+		oldc = c[0];
+		do {
+			if(y <= clist[oldc].y)
+				continue;
+			l = search(c, k, y);
+			if(l!=oldl+1)
+				oldc = c[l-1];
+			if(l<=k) {
+				if(clist[c[l]].y <= y)
+					continue;
+				tc = c[l];
+				c[l] = newcand(i,y,oldc);
+				oldc = tc;
+				oldl = l;
+			} else {
+				c[l] = newcand(i,y,oldc);
+				k++;
+				break;
+			}
+		} while((y=b[++j]) > 0);
+	}
+	return k;
+}
+
+static void
+unravel(int p)
+{
+	int i;
+	struct cand *q;
+
+	for(i=0; i<=len[0]; i++) {
+		if (i <= pref)
+			J[i] = i;
+		else if (i > len[0]-suff)
+			J[i] = i+len[1]-len[0];
+		else
+			J[i] = 0;
+	}
+	for(q=clist+p;q->y!=0;q=clist+q->pred)
+		J[q->x+pref] = q->y+pref;
+}
+
+static void
+output(void)
+{
+	int m, i0, i1, j0, j1;
+
+	m = len[0];
+	J[0] = 0;
+	J[m+1] = len[1]+1;
+	if (mode != 'e') {
+		for (i0 = 1; i0 <= m; i0 = i1+1) {
+			while (i0 <= m && J[i0] == J[i0-1]+1)
+				i0++;
+			j0 = J[i0-1]+1;
+			i1 = i0-1;
+			while (i1 < m && J[i1+1] == 0)
+				i1++;
+			j1 = J[i1+1]-1;
+			J[i1] = j1;
+			change(i0, i1, j0, j1);
+		}
+	}
+	else {
+		for (i0 = m; i0 >= 1; i0 = i1-1) {
+			while (i0 >= 1 && J[i0] == J[i0+1]-1 && J[i0])
+				i0--;
+			j0 = J[i0+1]-1;
+			i1 = i0+1;
+			while (i1 > 1 && J[i1-1] == 0)
+				i1--;
+			j1 = J[i1-1]+1;
+			J[i1] = j1;
+			change(i1 , i0, j1, j0);
+		}
+	}
+	if (m == 0)
+		change(1, 0, 1, len[1]);
+	flushchanges();
+}
+
+#define BUF 4096
+static int
+cmp(Biobuf* b1, Biobuf* b2)
+{
+	int n;
+	uchar buf1[BUF], buf2[BUF];
+	int f1, f2;
+	vlong nc = 1;
+	uchar *b1s, *b1e, *b2s, *b2e;
+
+	f1 = Bfildes(b1);
+	f2 = Bfildes(b2);
+	seek(f1, 0, 0);
+	seek(f2, 0, 0);
+	b1s = b1e = buf1;
+	b2s = b2e = buf2;
+	for(;;){
+		if(b1s >= b1e){
+			if(b1s >= &buf1[BUF])
+				b1s = buf1;
+			n = read(f1, b1s,  &buf1[BUF] - b1s);
+			b1e = b1s + n;
+		}
+		if(b2s >= b2e){
+			if(b2s >= &buf2[BUF])
+				b2s = buf2;
+			n = read(f2, b2s,  &buf2[BUF] - b2s);
+			b2e = b2s + n;
+		}
+		n = b2e - b2s;
+		if(n > b1e - b1s)
+			n = b1e - b1s;
+		if(n <= 0)
+			break;
+		if(memcmp((void *)b1s, (void *)b2s, n) != 0){
+			return 1;
+		}		
+		nc += n;
+		b1s += n;
+		b2s += n;
+	}
+	if(b1e - b1s == b2e - b2s)
+		return 0;
+	return 1;	
+}
+
+void
+diffreg(char *f, char *t)
+{
+	Biobuf *b0, *b1;
+	int k;
+
+	binary = 0;
+	b0 = prepare(0, f);
+	if (!b0)
+		return;
+	b1 = prepare(1, t);
+	if (!b1) {
+		FREE(file[0]);
+		Bterm(b0);
+		return;
+	}
+	if (binary){
+		/* could use b0 and b1 but this is simpler. */
+		if (cmp(b0, b1))
+			print("binary files %s %s differ\n", f, t);
+		Bterm(b0);
+		Bterm(b1);
+		return;
+	}
+	clen = 0;
+	prune();
+	sort(sfile[0], slen[0]);
+	sort(sfile[1], slen[1]);
+
+	member = (int *)file[1];
+	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
+	member = REALLOC(member, int, slen[1]+2);
+
+	class = (int *)file[0];
+	unsort(sfile[0], slen[0], class);
+	class = REALLOC(class, int, slen[0]+2);
+
+	klist = MALLOC(int, slen[0]+2);
+	clist = MALLOC(struct cand, 1);
+	k = stone(class, slen[0], member, klist);
+	FREE(member);
+	FREE(class);
+
+	J = MALLOC(int, len[0]+2);
+	unravel(klist[k]);
+	FREE(clist);
+	FREE(klist);
+
+	ixold = MALLOC(long, len[0]+2);
+	ixnew = MALLOC(long, len[1]+2);
+	Bseek(b0, 0, 0); Bseek(b1, 0, 0);
+	check(b0, b1);
+	output();
+	FREE(J); FREE(ixold); FREE(ixnew);
+	Bterm(b0); Bterm(b1);			/* ++++ */
+}
--- a/diff/main.c
+++ b/diff/main.c
@@ -0,0 +1,270 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "diff.h"
+
+#define	DIRECTORY(s)		((s)->qid.type&QTDIR)
+#define	REGULAR_FILE(s)		((s)->type == 'M' && !DIRECTORY(s))
+
+Biobuf	stdout;
+
+static char *tmp[] = {"/tmp/diff1XXXXXXXXXXX", "/tmp/diff2XXXXXXXXXXX"};
+static int whichtmp;
+static char *progname;
+static char usage[] = "diff [ -acefmnbwr ] file1 ... file2\n";
+
+static void
+rmtmpfiles(void)
+{
+	while (whichtmp > 0) {
+		whichtmp--;
+		remove(tmp[whichtmp]);
+	}
+}
+
+void	
+done(int status)
+{
+	rmtmpfiles();
+	switch(status)
+	{
+	case 0:
+		exits("");
+	case 1:
+		exits("some");
+	default:
+		exits("error");
+	}
+	/*NOTREACHED*/
+}
+
+void
+panic(int status, char *fmt, ...)
+{
+	va_list arg;
+
+	Bflush(&stdout);
+
+	fprint(2, "%s: ", progname);
+	va_start(arg, fmt);
+	vfprint(2, fmt, arg);
+	va_end(arg);
+	if (status)
+		done(status);
+		/*NOTREACHED*/
+}
+
+static int
+catch(void *a, char *msg)
+{
+	USED(a);
+	panic(2, msg);
+	return 1;
+}
+
+int
+mkpathname(char *pathname, char *path, char *name)
+{
+	if (strlen(path) + strlen(name) > MAXPATHLEN) {
+		panic(0, "pathname %s/%s too long\n", path, name);
+		return 1;
+	}
+	sprint(pathname, "%s/%s", path, name);
+	return 0;
+}
+	
+static char *
+mktmpfile(int input, Dir **sb)
+{
+	int fd, i;
+	char *p;
+	char buf[8192];
+
+	atnotify(catch, 1);
+/*
+	p = mktemp(tmp[whichtmp++]);
+	fd = create(p, OWRITE, 0600);
+*/
+	fd = mkstemp(p=tmp[whichtmp++]);
+	if (fd < 0) {
+		panic(mflag ? 0: 2, "cannot create %s: %r\n", p);
+		return 0;
+	}
+	while ((i = read(input, buf, sizeof(buf))) > 0) {
+		if ((i = write(fd, buf, i)) < 0)
+			break;
+	}
+	*sb = dirfstat(fd);
+	close(fd);
+	if (i < 0) {
+		panic(mflag ? 0: 2, "cannot read/write %s: %r\n", p);
+		return 0;
+	}
+	return p;
+}
+
+static char *
+statfile(char *file, Dir **sb)
+{
+	Dir *dir;
+	int input;
+
+	dir = dirstat(file);
+	if(dir == nil) {
+		if (strcmp(file, "-") || (dir = dirfstat(0)) == nil) {
+			panic(mflag ? 0: 2, "cannot stat %s: %r\n", file);
+			return 0;
+		}
+		free(dir);
+		return mktmpfile(0, sb);
+	}
+	else if (!REGULAR_FILE(dir) && !DIRECTORY(dir)) {
+		free(dir);
+		if ((input = open(file, OREAD)) == -1) {
+			panic(mflag ? 0: 2, "cannot open %s: %r\n", file);
+			return 0;
+		}
+		file = mktmpfile(input, sb);
+		close(input);
+	}
+	else
+		*sb = dir;
+	return file;
+}
+
+void
+diff(char *f, char *t, int level)
+{
+	char *fp, *tp, *p, fb[MAXPATHLEN+1], tb[MAXPATHLEN+1];
+	Dir *fsb, *tsb;
+
+	if ((fp = statfile(f, &fsb)) == 0)
+		goto Return;
+	if ((tp = statfile(t, &tsb)) == 0){
+		free(fsb);
+		goto Return;
+	}
+	if (DIRECTORY(fsb) && DIRECTORY(tsb)) {
+		if (rflag || level == 0)
+			diffdir(fp, tp, level);
+		else
+			Bprint(&stdout, "Common subdirectories: %s and %s\n",
+				fp, tp);
+	}
+	else if (REGULAR_FILE(fsb) && REGULAR_FILE(tsb))
+		diffreg(fp, tp);
+	else {
+		if (REGULAR_FILE(fsb)) {
+			if ((p = utfrrune(f, '/')) == 0)
+				p = f;
+			else
+				p++;
+			if (mkpathname(tb, tp, p) == 0)
+				diffreg(fp, tb);
+		}
+		else {
+			if ((p = utfrrune(t, '/')) == 0)
+				p = t;
+			else
+				p++;
+			if (mkpathname(fb, fp, p) == 0)
+				diffreg(fb, tp);
+		}
+	}
+	free(fsb);
+	free(tsb);
+Return:
+	rmtmpfiles();
+}
+
+void
+main(int argc, char *argv[])
+{
+	char *p;
+	int i;
+	Dir *fsb, *tsb;
+	extern int _p9usepwlibrary;
+	
+	_p9usepwlibrary = 0;
+	Binit(&stdout, 1, OWRITE);
+	progname = *argv;
+	while (--argc && (*++argv)[0] == '-' && (*argv)[1]) {
+		for (p = *argv+1; *p; p++) {
+			switch (*p) {
+
+			case 'e':
+			case 'f':
+			case 'n':
+			case 'c':
+			case 'a':
+				mode = *p;
+				break;
+
+			case 'w':
+				bflag = 2;
+				break;
+
+			case 'b':
+				bflag = 1;
+				break;
+
+			case 'r':
+				rflag = 1;
+				mflag = 1;
+				break;
+
+			case 'm':
+				mflag = 1;	
+				break;
+
+			case 'h':
+			default:
+				progname = "Usage";
+				panic(2, usage);
+			}
+		}
+	}
+	if (argc < 2)
+		panic(2, usage, progname);
+	if ((tsb = dirstat(argv[argc-1])) == nil)
+		panic(2, "can't stat %s\n", argv[argc-1]);
+	if (argc > 2) {
+		if (!DIRECTORY(tsb))
+			panic(2, usage, progname);
+		mflag = 1;
+	}
+	else {
+		if ((fsb = dirstat(argv[0])) == nil)
+			panic(2, "can't stat %s\n", argv[0]);
+		if (DIRECTORY(fsb) && DIRECTORY(tsb))
+			mflag = 1;
+		free(fsb);
+	}
+	free(tsb);
+	for (i = 0; i < argc-1; i++)
+		diff(argv[i], argv[argc-1], 0);
+	done(anychange);
+	/*NOTREACHED*/
+}
+
+static char noroom[] = "out of memory - try diff -h\n";
+
+void *
+emalloc(unsigned n)
+{
+	register void *p;
+
+	if ((p = malloc(n)) == 0)
+		panic(2, noroom);
+	return p;
+}
+
+void *
+erealloc(void *p, unsigned n)
+{
+	register void *rp;
+
+	if ((rp = realloc(p, n)) == 0)
+		panic(2, noroom);
+	return rp;
+}
--- a/join/Makefile
+++ b/join/Makefile
@@ -0,0 +1,10 @@
+# join - join unix port from plan9
+# Depends on ../lib9
+
+TARG      = join
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
--- a/join/join.1
+++ b/join/join.1
@@ -0,0 +1,147 @@
+.TH JOIN 1
+.CT 1 files
+.SH NAME
+join \- relational database operator
+.SH SYNOPSIS
+.B join
+[
+.I options
+]
+.I file1 file2
+.SH DESCRIPTION
+.I Join
+forms, on the standard output,
+a join
+of the two relations specified by the lines of
+.I file1
+and
+.IR file2 .
+If one of the file names is
+.LR - ,
+the standard input is used.
+.PP
+.I File1
+and
+.I file2
+must be sorted in increasing
+.SM ASCII 
+collating
+sequence on the fields
+on which they are to be joined,
+normally the first in each line.
+.PP
+There is one line in the output
+for each pair of lines in
+.I file1
+and
+.I file2
+that have identical join fields.
+The output line normally consists of the common field,
+then the rest of the line from
+.IR file1 ,
+then the rest of the line from
+.IR file2 .
+.PP
+Input fields are normally separated spaces or tabs;
+output fields by space.
+In this case, multiple separators count as one, and
+leading separators are discarded.
+.PP
+The following options are recognized, with POSIX syntax.
+.TP
+.BI -a " n
+In addition to the normal output,
+produce a line for each unpairable line in file
+.IR n ,
+where
+.I n
+is 1 or 2.
+.TP
+.BI -v " n
+Like
+.BR -a ,
+omitting output for paired lines.
+.TP
+.BI -e " s
+Replace empty output fields by string
+.IR s .
+.TP
+.BI -1 " m
+.br
+.ns
+.TP
+.BI -2 " m
+Join on the
+.IR m th
+field of
+.I file1
+or
+.IR file2 .
+.TP
+.BI -j "n m"
+Archaic equivalent for
+.BI - n " m"\f1.
+.TP
+.BI -o fields
+Each output line comprises the designated fields.
+The comma-separated field designators are either
+.BR 0 ,
+meaning the join field, or have the form
+.IR n . m ,
+where
+.I n
+is a file number and
+.I m
+is a field number.
+Archaic usage allows separate arguments for field designators.
+.PP
+.TP
+.BI -t c
+Use character
+.I c
+as the only separator (tab character) on input and output.
+Every appearance of
+.I c
+in a line is significant.
+.SH EXAMPLES
+.TP
+.L
+sort /etc/passwd | join -t: -1 1 -a 1 -e "" - bdays
+Add birthdays to the
+.B /etc/passwd
+file, leaving unknown
+birthdays empty.
+The layout of 
+.B /adm/users
+is given in
+.IR passwd (5);
+.B bdays
+contains sorted lines like
+.LR "ken:Feb\ 4,\ 1953" .
+.TP
+.L
+tr : ' ' </etc/passwd | sort -k 3 3 >temp
+.br
+.ns
+.TP
+.L
+join -1 3 -2 3 -o 1.1,2.1 temp temp | awk '$1 < $2'
+Print all pairs of users with identical userids.
+.SH SOURCE
+.B \*9/src/cmd/join.c
+.SH "SEE ALSO"
+.IR sort (1), 
+.IR comm (1), 
+.IR awk (1)
+.SH BUGS
+With default field separation,
+the collating sequence is that of
+.BI "sort -b"
+.BI -k y , y\f1;
+with
+.BR -t ,
+the sequence is that of
+.BI "sort -t" x
+.BI -k y , y\f1.
+.PP
+One of the files must be randomly accessible.
--- a/join/join.c
+++ b/join/join.c
@@ -0,0 +1,369 @@
+/*	join F1 F2 on stuff */
+#include <u.h>
+#include <libc.h>
+#include <stdio.h>
+#include <ctype.h>
+#define F1 0
+#define F2 1
+#define F0 3
+#define	NFLD	100	/* max field per line */
+#define comp() runecmp(ppi[F1][j1],ppi[F2][j2])
+FILE *f[2];
+Rune buf[2][BUFSIZ];	/*input lines */
+Rune *ppi[2][NFLD+1];	/* pointers to fields in lines */
+Rune *s1,*s2;
+#define j1 joinj1
+#define j2 joinj2
+
+int	j1	= 1;	/* join of this field of file 1 */
+int	j2	= 1;	/* join of this field of file 2 */
+int	olist[2*NFLD];	/* output these fields */
+int	olistf[2*NFLD];	/* from these files */
+int	no;		/* number of entries in olist */
+Rune	sep1	= ' ';	/* default field separator */
+Rune	sep2	= '\t';
+char *sepstr=" ";
+int	discard;	/* count of truncated lines */
+Rune	null[BUFSIZ]/*	= L""*/;
+int	a1;
+int 	a2;
+
+char *getoptarg(int*, char***);
+void output(int, int);
+int input(int);
+void oparse(char*);
+void error(char*, char*);
+void seek1(void), seek2(void);
+Rune *strtorune(Rune *, char *);
+
+
+void
+main(int argc, char **argv)
+{
+	int i;
+
+	while (argc > 1 && argv[1][0] == '-') {
+		if (argv[1][1] == '\0')
+			break;
+		switch (argv[1][1]) {
+		case '-':
+			argc--;
+			argv++;
+			goto proceed;
+		case 'a':
+			switch(*getoptarg(&argc, &argv)) {
+			case '1':
+				a1++;
+				break;
+			case '2':
+				a2++;
+				break;
+			default:
+				error("incomplete option -a","");
+			}
+			break;
+		case 'e':
+			strtorune(null, getoptarg(&argc, &argv));
+			break;
+		case 't':
+			sepstr=getoptarg(&argc, &argv);
+			chartorune(&sep1, sepstr);
+			sep2 = sep1;
+			break;
+		case 'o':
+			if(argv[1][2]!=0 ||
+			   argc>2 && strchr(argv[2],',')!=0)
+				oparse(getoptarg(&argc, &argv));
+			else for (no = 0; no<2*NFLD && argc>2; no++){
+				if (argv[2][0] == '1' && argv[2][1] == '.') {
+					olistf[no] = F1;
+					olist[no] = atoi(&argv[2][2]);
+				} else if (argv[2][0] == '2' && argv[2][1] == '.') {
+					olist[no] = atoi(&argv[2][2]);
+					olistf[no] = F2;
+				} else if (argv[2][0] == '0')
+					olistf[no] = F0;
+				else
+					break;
+				argc--;
+				argv++;
+			}
+			break;
+		case 'j':
+			if(argc <= 2)
+				break;
+			if (argv[1][2] == '1')
+				j1 = atoi(argv[2]);
+			else if (argv[1][2] == '2')
+				j2 = atoi(argv[2]);
+			else
+				j1 = j2 = atoi(argv[2]);
+			argc--;
+			argv++;
+			break;
+		case '1':
+			j1 = atoi(getoptarg(&argc, &argv));
+			break;
+		case '2':
+			j2 = atoi(getoptarg(&argc, &argv));
+			break;
+		}
+		argc--;
+		argv++;
+	}
+proceed:
+	for (i = 0; i < no; i++)
+		if (olist[i]-- > NFLD)	/* 0 origin */
+			error("field number too big in -o","");
+	if (argc != 3)
+		error("usage: join [-1 x -2 y] [-o list] file1 file2","");
+	j1--;
+	j2--;	/* everyone else believes in 0 origin */
+	s1 = ppi[F1][j1];
+	s2 = ppi[F2][j2];
+	if (strcmp(argv[1], "-") == 0)
+		f[F1] = stdin;
+	else if ((f[F1] = fopen(argv[1], "r")) == 0)
+		error("can't open %s", argv[1]);
+	if(strcmp(argv[2], "-") == 0) {
+		f[F2] = stdin;
+	} else if ((f[F2] = fopen(argv[2], "r")) == 0)
+		error("can't open %s", argv[2]);
+
+	if(ftell(f[F2]) >= 0)
+		seek2();
+	else if(ftell(f[F1]) >= 0)
+		seek1();
+	else
+		error("neither file is randomly accessible","");
+	if (discard)
+		error("some input line was truncated", "");
+	exits("");
+}
+int runecmp(Rune *a, Rune *b){
+	while(*a==*b){
+		if(*a=='\0') return 0;
+		a++;
+		b++;
+	}
+	if(*a<*b) return -1;
+	return 1;
+}
+char *runetostr(char *buf, Rune *r){
+	char *s;
+	for(s=buf;*r;r++) s+=runetochar(s, r);
+	*s='\0';
+	return buf;
+}
+Rune *strtorune(Rune *buf, char *s){
+	Rune *r;
+	for(r=buf;*s;r++) s+=chartorune(r, s);
+	*r='\0';
+	return buf;
+}
+/* lazy.  there ought to be a clean way to combine seek1 & seek2 */
+#define get1() n1=input(F1)
+#define get2() n2=input(F2)
+void
+seek2(void)
+{
+	int n1, n2;
+	int top2=0;
+	int bot2 = ftell(f[F2]);
+	get1();
+	get2();
+	while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
+		if(n1>0 && n2>0 && comp()>0 || n1==0) {
+			if(a2) output(0, n2);
+			bot2 = ftell(f[F2]);
+			get2();
+		} else if(n1>0 && n2>0 && comp()<0 || n2==0) {
+			if(a1) output(n1, 0);
+			get1();
+		} else /*(n1>0 && n2>0 && comp()==0)*/ {
+			while(n2>0 && comp()==0) {
+				output(n1, n2);
+				top2 = ftell(f[F2]);
+				get2();
+			}
+			fseek(f[F2], bot2, 0);
+			get2();
+			get1();
+			for(;;) {
+				if(n1>0 && n2>0 && comp()==0) {
+					output(n1, n2);
+					get2();
+				} else if(n1>0 && n2>0 && comp()<0 || n2==0) {
+					fseek(f[F2], bot2, 0);
+					get2();
+					get1();
+				} else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{
+					fseek(f[F2], top2, 0);
+					bot2 = top2;
+					get2();
+					break;
+				}
+			}
+		}
+	}
+}
+void
+seek1(void)
+{
+	int n1, n2;
+	int top1=0;
+	int bot1 = ftell(f[F1]);
+	get1();
+	get2();
+	while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
+		if(n1>0 && n2>0 && comp()>0 || n1==0) {
+			if(a2) output(0, n2);
+			get2();
+		} else if(n1>0 && n2>0 && comp()<0 || n2==0) {
+			if(a1) output(n1, 0);
+			bot1 = ftell(f[F1]);
+			get1();
+		} else /*(n1>0 && n2>0 && comp()==0)*/ {
+			while(n2>0 && comp()==0) {
+				output(n1, n2);
+				top1 = ftell(f[F1]);
+				get1();
+			}
+			fseek(f[F1], bot1, 0);
+			get2();
+			get1();
+			for(;;) {
+				if(n1>0 && n2>0 && comp()==0) {
+					output(n1, n2);
+					get1();
+				} else if(n1>0 && n2>0 && comp()>0 || n1==0) {
+					fseek(f[F1], bot1, 0);
+					get2();
+					get1();
+				} else /*(n1>0 && n2>0 && comp()<0 || n2==0)*/{
+					fseek(f[F1], top1, 0);
+					bot1 = top1;
+					get1();
+					break;
+				}
+			}
+		}
+	}
+}
+
+int
+input(int n)		/* get input line and split into fields */
+{
+	register int i, c;
+	Rune *bp;
+	Rune **pp;
+	char line[BUFSIZ];
+
+	bp = buf[n];
+	pp = ppi[n];
+	if (fgets(line, BUFSIZ, f[n]) == 0)
+		return(0);
+	strtorune(bp, line);
+	i = 0;
+	do {
+		i++;
+		if (sep1 == ' ')	/* strip multiples */
+			while ((c = *bp) == sep1 || c == sep2)
+				bp++;	/* skip blanks */
+		*pp++ = bp;	/* record beginning */
+		while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0')
+			bp++;
+		*bp++ = '\0';	/* mark end by overwriting blank */
+	} while (c != '\n' && c != '\0' && i < NFLD-1);
+	if (c != '\n')
+		discard++;
+
+	*pp = 0;
+	return(i);
+}
+
+void
+output(int on1, int on2)	/* print items from olist */
+{
+	int i;
+	Rune *temp;
+	char buf[BUFSIZ];
+
+	if (no <= 0) {	/* default case */
+		printf("%s", runetostr(buf, on1? ppi[F1][j1]: ppi[F2][j2]));
+		for (i = 0; i < on1; i++)
+			if (i != j1)
+				printf("%s%s", sepstr, runetostr(buf, ppi[F1][i]));
+		for (i = 0; i < on2; i++)
+			if (i != j2)
+				printf("%s%s", sepstr, runetostr(buf, ppi[F2][i]));
+		printf("\n");
+	} else {
+		for (i = 0; i < no; i++) {
+			if (olistf[i]==F0 && on1>j1)
+				temp = ppi[F1][j1];
+			else if (olistf[i]==F0 && on2>j2)
+				temp = ppi[F2][j2];
+			else {
+				temp = ppi[olistf[i]][olist[i]];
+				if(olistf[i]==F1 && on1<=olist[i] ||
+				   olistf[i]==F2 && on2<=olist[i] ||
+				   *temp==0)
+					temp = null;
+			}
+			printf("%s", runetostr(buf, temp));
+			if (i == no - 1)
+				printf("\n");
+			else
+				printf("%s", sepstr);
+		}
+	}
+}
+
+void
+error(char *s1, char *s2)
+{
+	fprintf(stderr, "join: ");
+	fprintf(stderr, s1, s2);
+	fprintf(stderr, "\n");
+	exits(s1);
+}
+
+char *
+getoptarg(int *argcp, char ***argvp)
+{
+	int argc = *argcp;
+	char **argv = *argvp;
+	if(argv[1][2] != 0)
+		return &argv[1][2];
+	if(argc<=2 || argv[2][0]=='-')
+		error("incomplete option %s", argv[1]);
+	*argcp = argc-1;
+	*argvp = ++argv;
+	return argv[1];
+}
+
+void
+oparse(char *s)
+{
+	for (no = 0; no<2*NFLD && *s; no++, s++) {
+		switch(*s) {
+		case 0:
+			return;
+		case '0':
+			olistf[no] = F0;
+			break;
+		case '1':
+		case '2':
+			if(s[1] == '.' && isdigit((uchar)s[2])) {
+				olistf[no] = *s=='1'? F1: F2;
+				olist[no] = atoi(s += 2);
+				break;
+			} /* fall thru */
+		default:
+			error("invalid -o list", "");
+		}
+		if(s[1] == ',')
+			s++;
+	}
+}
--- a/lib9/utf.h
+++ b/lib9/utf.h
@@ -11,7 +11,8 @@ enum
 	UTFmax		= 3,		/* maximum bytes per rune */
 	Runesync	= 0x80,		/* cannot represent part of a UTF sequence (<) */
 	Runeself	= 0x80,		/* rune and UTF sequences are the same (<) */
-	Runeerror	= 0xFFFD		/* decoding error in UTF */
+	Runeerror	= 0xFFFD,		/* decoding error in UTF */
+	Runemax = 0x10FFFF	/* maximum rune value */
 };

 /* Edit .+1,/^$/ | cfn $PLAN9/src/lib9/utf/?*.c | grep -v static |grep -v __ */
--- a/look/Makefile
+++ b/look/Makefile
@@ -0,0 +1,10 @@
+# look - look unix port from plan9
+# Depends on ../lib9
+
+TARG      = look
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
--- a/look/look.1
+++ b/look/look.1
@@ -0,0 +1,85 @@
+.TH LOOK 1
+.SH NAME
+look \- find lines in a sorted list
+.SH SYNOPSIS
+.B look
+[
+.BI -dfnixt c
+]
+[
+.I string
+]
+[
+.I file
+]
+.SH DESCRIPTION
+.I Look
+consults a sorted
+.I file
+and prints all lines that begin with
+.IR string .
+It uses binary search.
+.PP
+The following options are recognized.
+Options
+.B dfnt
+affect comparisons as in
+.IR  sort (1).
+.TP
+.B -i
+Interactive.
+There is no
+.I string
+argument; instead
+.I look
+takes lines from the standard input as strings to be looked up.
+.TP
+.B -x
+Exact.
+Print only lines of the file whose key matches
+.I string
+exactly.
+.TP
+.B  -d
+`Directory' order:
+only letters, digits,
+tabs and blanks participate in comparisons.
+.TP
+.B  -f
+Fold.
+Upper case letters compare equal to lower case.
+.TP
+.B -n
+Numeric comparison with initial string of digits, optional minus sign,
+and optional decimal point.
+.TP
+.BR -t [ \f2c\f1 ]
+Character
+.I c
+terminates the sort key in the
+.IR file .
+By default, tab terminates the key.  If
+.I c
+is missing the entire line comprises the key.
+.PP
+If no
+.I file
+is specified,
+.B /lib/words
+is assumed, with collating sequence
+.BR df .
+.SH FILES
+.B /lib/words
+.SH SOURCE
+.B \*9/src/cmd/look.c
+.SH "SEE ALSO"
+.IR sort (1), 
+.IR grep (1)
+.SH DIAGNOSTICS
+The exit status is
+.RB `` "not found" ''
+if no match is found, and
+.RB `` "no dictionary" ''
+if
+.I file
+or the default dictionary cannot be opened.
--- a/look/look.c
+++ b/look/look.c
@@ -0,0 +1,349 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+	/* Macros for Rune support of ctype.h-like functions */
+
+#undef isupper
+#undef islower
+#undef isalpha
+#undef isdigit
+#undef isalnum
+#undef isspace
+#undef tolower
+#define	isupper(r)	('A' <= (r) && (r) <= 'Z')
+#define	islower(r)	('a' <= (r) && (r) <= 'z')
+#define	isalpha(r)	(isupper(r) || islower(r))
+#define	islatin1(r)	(0xC0 <= (r) && (r) <= 0xFF)
+
+#define	isdigit(r)	('0' <= (r) && (r) <= '9')
+
+#define	isalnum(r)	(isalpha(r) || isdigit(r))
+
+#define	isspace(r)	((r) == ' ' || (r) == '\t' \
+			|| (0x0A <= (r) && (r) <= 0x0D))
+
+#define	tolower(r)	((r)-'A'+'a')
+
+#define	sgn(v)		((v) < 0 ? -1 : ((v) > 0 ? 1 : 0))
+
+#define	WORDSIZ	4000
+char	*filename = "#9/lib/words";
+Biobuf	*dfile;
+Biobuf	bout;
+Biobuf	bin;
+
+int	fold;
+int	direc;
+int	exact;
+int	iflag;
+int	rev = 1;	/*-1 for reverse-ordered file, not implemented*/
+int	(*compare)(Rune*, Rune*);
+Rune	tab = '\t';
+Rune	entry[WORDSIZ];
+Rune	word[WORDSIZ];
+Rune	key[50], orig[50];
+Rune	latin_fold_tab[] =
+{
+/*	Table to fold latin 1 characters to ASCII equivalents
+			based at Rune value 0xc0
+
+	 À    Á    Â    Ã    Ä    Å    Æ    Ç
+	 È    É    Ê    Ë    Ì    Í    Î    Ï
+	 Ð    Ñ    Ò    Ó    Ô    Õ    Ö    ×
+	 Ø    Ù    Ú    Û    Ü    Ý    Þ    ß
+	 à    á    â    ã    ä    å    æ    ç
+	 è    é    ê    ë    ì    í    î    ï
+	 ð    ñ    ò    ó    ô    õ    ö    ÷
+	 ø    ù    ú    û    ü    ý    þ    ÿ
+*/
+	'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
+	'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
+	'd', 'n', 'o', 'o', 'o', 'o', 'o',  0 ,
+	'o', 'u', 'u', 'u', 'u', 'y',  0 ,  0 ,
+	'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
+	'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
+	'd', 'n', 'o', 'o', 'o', 'o', 'o',  0 ,
+	'o', 'u', 'u', 'u', 'u', 'y',  0 , 'y',
+};
+
+int	locate(void);
+int	acomp(Rune*, Rune*);
+int	getword(Biobuf*, Rune *rp, int n);
+void	torune(char*, Rune*);
+void	rcanon(Rune*, Rune*);
+int	ncomp(Rune*, Rune*);
+
+void
+main(int argc, char *argv[])
+{
+	int n;
+
+	filename = unsharp(filename);
+
+	Binit(&bin, 0, OREAD);
+	Binit(&bout, 1, OWRITE);
+	compare = acomp;
+	ARGBEGIN{
+	case 'd':
+		direc++;
+		break;
+	case 'f':
+		fold++;
+		break;
+	case 'i': 
+		iflag++;
+		break;
+	case 'n':
+		compare = ncomp;
+		break;
+	case 't':
+		chartorune(&tab,ARGF());
+		break;
+	case 'x':
+		exact++;
+		break;
+	default:
+		fprint(2, "%s: bad option %c\n", argv0, ARGC());
+		fprint(2, "usage: %s -[dfinx] [-t c] [string] [file]\n", argv0);
+		exits("usage");
+	} ARGEND
+	if(!iflag){
+		if(argc >= 1) {
+			torune(argv[0], orig);
+			argv++;
+			argc--;
+		} else
+			iflag++;
+	}
+	if(argc < 1) {
+		direc++;
+		fold++;
+	} else 
+		filename = argv[0];
+	if (!iflag)
+		rcanon(orig, key);
+	dfile = Bopen(filename, OREAD);
+	if(dfile == 0) {
+		fprint(2, "look: can't open %s\n", filename);
+		exits("no dictionary");
+	}
+	if(!iflag)
+		if(!locate())
+			exits("not found");
+	do {
+		if(iflag) {
+			Bflush(&bout);
+			if(!getword(&bin, orig, sizeof(orig)/sizeof(orig[0])))
+				exits(0);
+			rcanon(orig, key);
+			if(!locate())
+				continue;
+		}
+		if (!exact || !acomp(word, key))
+			Bprint(&bout, "%S\n", entry);
+		while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
+			rcanon(entry, word);
+			n = compare(key, word);
+			switch(n) {
+			case -1:
+				if(exact)
+					break;
+			case 0:
+				if (!exact || !acomp(word, orig))
+					Bprint(&bout, "%S\n", entry);
+				continue;
+			}
+			break;
+		}
+	} while(iflag);
+	exits(0);
+}
+
+int
+locate(void)
+{
+	vlong top, bot, mid;
+	int c;
+	int n;
+
+	bot = 0;
+	top = Bseek(dfile, 0L, 2);
+	for(;;) {
+		mid = (top+bot) / 2;
+		Bseek(dfile, mid, 0);
+		do
+			c = Bgetrune(dfile);
+		while(c>=0 && c!='\n');
+		mid = Boffset(dfile);
+		if(!getword(dfile, entry, sizeof(entry)/sizeof(entry[0])))
+			break;
+		rcanon(entry, word);
+		n = compare(key, word);
+		switch(n) {
+		case -2:
+		case -1:
+		case 0:
+			if(top <= mid)
+				break;
+			top = mid;
+			continue;
+		case 1:
+		case 2:
+			bot = mid;
+			continue;
+		}
+		break;
+	}
+	Bseek(dfile, bot, 0);
+	while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
+		rcanon(entry, word);
+		n = compare(key, word);
+		switch(n) {
+		case -2:
+			return 0;
+		case -1:
+			if(exact)
+				return 0;
+		case 0:
+			return 1;
+		case 1:
+		case 2:
+			continue;
+		}
+	}
+	return 0;
+}
+
+/*
+ *	acomp(s, t) returns:
+ *		-2 if s strictly precedes t
+ *		-1 if s is a prefix of t
+ *		0 if s is the same as t
+ *		1 if t is a prefix of s
+ *		2 if t strictly precedes s
+ */
+
+int
+acomp(Rune *s, Rune *t)
+{
+	int cs, ct;
+
+	for(;;) {
+		cs = *s;
+		ct = *t;
+		if(cs != ct)
+			break;
+		if(cs == 0)
+			return 0;
+		s++;
+		t++;
+	}
+	if(cs == 0)
+		return -1;
+	if(ct == 0)
+		return 1;
+	if(cs < ct)
+		return -2;
+	return 2;
+}
+
+void
+torune(char *old, Rune *new)
+{
+	do old += chartorune(new, old);
+	while(*new++);
+}
+
+void
+rcanon(Rune *old, Rune *new)
+{
+	Rune r;
+
+	while((r = *old++) && r != tab) {
+		if (islatin1(r) && latin_fold_tab[r-0xc0])
+				r = latin_fold_tab[r-0xc0];
+		if(direc)
+			if(!(isalnum(r) || r == ' ' || r == '\t'))
+				continue;
+		if(fold)
+			if(isupper(r))
+				r = tolower(r);
+		*new++ = r;
+	}
+	*new = 0;
+}
+
+int
+ncomp(Rune *s, Rune *t)
+{
+	Rune *is, *it, *js, *jt;
+	int a, b;
+	int ssgn, tsgn;
+
+	while(isspace(*s))
+		s++;
+	while(isspace(*t))
+		t++;
+	ssgn = tsgn = -2*rev;
+	if(*s == '-') {
+		s++;
+		ssgn = -ssgn;
+	}
+	if(*t == '-') {
+		t++;
+		tsgn = -tsgn;
+	}
+	for(is = s; isdigit(*is); is++)
+		;
+	for(it = t; isdigit(*it); it++)
+		;
+	js = is;
+	jt = it;
+	a = 0;
+	if(ssgn == tsgn)
+		while(it>t && is>s)
+			if(b = *--it - *--is)
+				a = b;
+	while(is > s)
+		if(*--is != '0')
+			return -ssgn;
+	while(it > t)
+		if(*--it != '0')
+			return tsgn;
+	if(a)
+		return sgn(a)*ssgn;
+	if(*(s=js) == '.')
+		s++;
+	if(*(t=jt) == '.')
+		t++;
+	if(ssgn == tsgn)
+		while(isdigit(*s) && isdigit(*t))
+			if(a = *t++ - *s++)
+				return sgn(a)*ssgn;
+	while(isdigit(*s))
+		if(*s++ != '0')
+			return -ssgn;
+	while(isdigit(*t))
+		if(*t++ != '0')
+			return tsgn;
+	return 0;
+}
+
+int
+getword(Biobuf *f, Rune *rp, int n)
+{
+	long c;
+
+	while(n-- > 0) {
+		c = Bgetrune(f);
+		if(c < 0)
+			return 0;
+		if(c == '\n') {
+			*rp = '\0';
+			return 1;
+		}
+		*rp++ = c;
+	}
+	fprint(2, "Look: word too long.  Bailing out.\n");
+	return 0;
+}
--- a/pbd/Makefile
+++ b/pbd/Makefile
@@ -0,0 +1,10 @@
+# pbd - pbd unix port from plan9
+# Depends on ../lib9
+
+TARG      = pbd
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
--- a/pbd/pbd.1
+++ b/pbd/pbd.1
--- a/pbd/pbd.c
+++ b/pbd/pbd.c
@@ -0,0 +1,19 @@
+#include <u.h>
+#include <libc.h>
+
+void
+main(void)
+{
+	char buf[512], *p;
+
+	p = "???";
+	if(getwd(buf, sizeof buf)){
+		p = strrchr(buf, '/');
+		if(p == nil)
+			p = buf;
+		else if(p>buf || p[1]!='\0')
+			p++;
+	}
+	write(1, p, strlen(p));
+	exits(0);
+}	
--- a/rc/Makefile
+++ b/rc/Makefile
@@ -46,7 +46,7 @@ uninstall:
 	@${CC} ${CFLAGS} -I../lib9 -I${PREFIX}/include -I../lib9 $*.c

 clean:
-	rm -f ${OFILES} ${TARG} y.tab.c y.tab.h
+	rm -f ${OFILES} ${TARG} y.tab.c y.tab.h x.tab.h

 ${TARG}: ${OFILES}
 	@echo LD ${TARG}
--- a/split/Makefile
+++ b/split/Makefile
@@ -0,0 +1,10 @@
+# split - split unix port from plan9
+# Depends on ../lib9
+
+TARG      = split
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
--- a/split/split.1
+++ b/split/split.1
@@ -0,0 +1,82 @@
+.TH SPLIT 1
+.CT 1 files
+.SH NAME
+split \- split a file into pieces
+.SH SYNOPSIS
+.B split
+[
+.I option ...
+]
+[
+.I file
+]
+.SH DESCRIPTION
+.I Split
+reads
+.I file
+(standard input by default)
+and writes it in pieces of 1000
+lines per output file.
+The names of the
+output files are
+.BR xaa ,
+.BR xab ,
+and so on to
+.BR xzz .
+The options are
+.TP
+.BI -n " n"
+Split into
+.IR n -line
+pieces.
+.TP
+.BI -l " n"
+Synonym for
+.B -n
+.IR n ,
+a nod to Unix's syntax.
+.TP
+.BI -e " expression"
+File divisions occur at each line
+that matches a regular
+.IR expression ;
+see 
+.IR regexp (7).
+Multiple
+.B -e
+options may appear.
+If a subexpression of
+.I expression
+is contained in parentheses
+.BR ( ... ) ,
+the output file name is the portion of the
+line which matches the subexpression.
+.TP
+.BI -f " stem
+Use
+.I stem
+instead of
+.B x
+in output file names.
+.TP
+.BI -s " suffix
+Append
+.I suffix
+to names identified under
+.BR -e .
+.TP
+.B -x
+Exclude the matched input line from the output file.
+.TP
+.B -i
+Ignore case in option
+.BR -e ;
+force output file names (excluding the suffix)
+to lower case.
+.SH SOURCE
+.B \*9/src/cmd/split.c
+.SH SEE ALSO
+.IR sed (1), 
+.IR awk (1),
+.IR grep (1),
+.IR regexp (7)
--- a/split/split.c
+++ b/split/split.c
@@ -0,0 +1,189 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <ctype.h>
+#include <regexp.h>
+
+char	digit[] = "0123456789";
+char	*suffix = "";
+char	*stem = "x";
+char	suff[] = "aa";
+char	name[200];
+Biobuf	bout;
+Biobuf	*output = &bout;
+
+extern int nextfile(void);
+extern int matchfile(Resub*);
+extern void openf(void);
+extern char *fold(char*,int);
+extern void usage(void);
+extern void badexp(void);
+
+void
+main(int argc, char *argv[])
+{
+	Reprog *exp;
+	char *pattern = 0;
+	int n = 1000;
+	char *line;
+	int xflag = 0;
+	int iflag = 0;
+	Biobuf bin;
+	Biobuf *b = &bin;
+	char buf[256];
+
+	ARGBEGIN {
+	case 'l':
+	case 'n':
+		n=atoi(EARGF(usage()));
+		break;
+	case 'e':
+		pattern = strdup(EARGF(usage()));
+		break;
+	case 'f':
+		stem = strdup(EARGF(usage()));
+		break;
+	case 's':
+		suffix = strdup(EARGF(usage()));
+		break;
+	case 'x':
+		xflag++;
+		break;
+	case 'i':
+		iflag++;
+		break;
+	default:
+		usage();
+		break;
+
+	} ARGEND;
+
+	if(argc < 0 || argc > 1)
+		usage();
+
+	if(argc != 0) {
+		b = Bopen(argv[0], OREAD);
+		if(b == nil) {
+			fprint(2, "split: can't open %s: %r\n", argv[0]);
+			exits("open");
+		}
+	} else
+		Binit(b, 0, OREAD);
+
+	if(pattern) {
+		if(!(exp = regcomp(iflag? fold(pattern,strlen(pattern)): pattern)))
+			badexp();
+		while((line=Brdline(b,'\n')) != 0) {
+			Resub match[2];
+			memset(match, 0, sizeof match);
+			line[Blinelen(b)-1] = 0;
+			if(regexec(exp,iflag?fold(line,Blinelen(b)-1):line,match,2)) {
+				if(matchfile(match) && xflag)
+					continue;
+			} else if(output == 0)
+				nextfile();	/* at most once */
+			Bwrite(output, line, Blinelen(b)-1);
+			Bputc(output, '\n');
+		}
+	} else {
+		int linecnt = n;
+
+		while((line=Brdline(b,'\n')) != 0) {
+			if(++linecnt > n) {
+				nextfile();
+				linecnt = 1;
+			}
+			Bwrite(output, line, Blinelen(b));
+		}
+
+		/*
+		 * in case we didn't end with a newline, tack whatever's 
+		 * left onto the last file
+		 */
+		while((n = Bread(b, buf, sizeof(buf))) > 0)
+			Bwrite(output, buf, n);
+	}
+	if(b != nil)
+		Bterm(b);
+	exits(0);
+}
+
+int
+nextfile(void)
+{
+	static int canopen = 1;
+	if(suff[0] > 'z') {
+		if(canopen)
+			fprint(2, "split: file %szz not split\n",stem);
+		canopen = 0;
+	} else {
+		strcpy(name, stem);
+		strcat(name, suff);
+		if(++suff[1] > 'z') 
+			suff[1] = 'a', ++suff[0];
+		openf();
+	}
+	return canopen;
+}
+
+int
+matchfile(Resub *match)
+{
+	if(match[1].s.sp) {
+		int len = match[1].e.ep - match[1].s.sp;
+		strncpy(name, match[1].s.sp, len);
+		strcpy(name+len, suffix);
+		openf();
+		return 1;
+	} 
+	return nextfile();
+}
+
+void
+openf(void)
+{
+	static int fd = 0;
+	Bflush(output);
+	Bterm(output);
+	if(fd > 0)
+		close(fd);
+	fd = create(name,OWRITE,0666);
+	if(fd < 0) {
+		fprint(2, "grep: can't create %s: %r\n", name);
+		exits("create");
+	}
+	Binit(output, fd, OWRITE);
+}
+
+char *
+fold(char *s, int n)
+{
+	static char *fline;
+	static int linesize = 0;
+	char *t;
+
+	if(linesize < n+1){
+		fline = realloc(fline,n+1);
+		linesize = n+1;
+	}
+	for(t=fline; *t++ = tolower((uchar)*s++); )
+		continue;
+		/* we assume the 'A'-'Z' only appear as themselves
+		 * in a utf encoding.
+		 */
+	return fline;
+}
+
+void
+usage(void)
+{
+	fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i] [file]\n");
+	exits("usage");
+}
+
+void
+badexp(void)
+{
+	fprint(2, "split: bad regular expression\n");
+	exits("bad regular expression");
+}
--- a/strings/Makefile
+++ b/strings/Makefile
@@ -0,0 +1,10 @@
+# strings - strings unix port from plan9
+# Depends on ../lib9
+
+TARG      = strings
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
--- a/strings/strings.1
+++ b/strings/strings.1
@@ -0,0 +1,28 @@
+.TH STRINGS 1
+.SH NAME
+strings \- extract printable strings
+.SH SYNOPSIS
+.B strings
+[
+.I file ...
+]
+.SH DESCRIPTION
+.I Strings
+finds and prints strings containing 6 or more
+consecutive printable UTF-encoded characters
+in a (typically) binary file, default
+standard input.
+Printable characters are taken to be
+.SM ASCII
+characters from blank through tilde (hexadecimal 20 through 7E), inclusive,
+and
+all other characters from value 00A0 to FFFF.
+Strings reports
+the decimal offset within the file at which the string starts and the text
+of the string. If the string is longer than 70 runes the line is
+terminated by three dots and the printing is resumed on the next
+line with the offset of the continuation line.
+.SH SOURCE
+.B \*9/src/cmd/strings.c
+.SH SEE ALSO
+.IR nm (1)
--- a/strings/strings.c
+++ b/strings/strings.c
@@ -0,0 +1,90 @@
+#include	<u.h>
+#include 	<libc.h>
+#include	<bio.h>
+
+Biobuf	*fin;
+Biobuf	fout;
+
+#define	MINSPAN		6		/* Min characters in string */
+
+#define BUFSIZE		70
+
+void stringit(char *);
+#undef isprint
+#define isprint risprint
+int isprint(Rune);
+
+void
+main(int argc, char **argv)
+{
+	int i;
+
+	Binit(&fout, 1, OWRITE);
+	if(argc < 2) {
+		stringit("/dev/stdin");
+		exits(0);
+	}
+
+	for(i = 1; i < argc; i++) {
+		if(argc > 2)
+			print("%s:\n", argv[i]);
+
+		stringit(argv[i]);
+	}
+
+	exits(0);
+}
+
+void
+stringit(char *str)
+{
+	long posn, start;
+	int cnt = 0;
+	long c;
+
+	Rune buf[BUFSIZE];
+
+	if ((fin = Bopen(str, OREAD)) == 0) {
+		perror("open");
+		return;
+	}
+
+	start = 0;
+	posn = Boffset(fin);
+	while((c = Bgetrune(fin)) >= 0) {
+		if(isprint(c)) {
+			if(start == 0)
+				start = posn;
+			buf[cnt++] = c;
+			if(cnt == BUFSIZE-1) {
+				buf[cnt] = 0;
+				Bprint(&fout, "%8ld: %S ...\n", start, buf);
+				start = 0;
+				cnt = 0;
+			}
+		} else {
+			 if(cnt >= MINSPAN) {
+				buf[cnt] = 0;
+				Bprint(&fout, "%8ld: %S\n", start, buf);
+			}
+			start = 0;
+			cnt = 0;
+		}	
+		posn = Boffset(fin);
+	}
+
+	if(cnt >= MINSPAN){
+		buf[cnt] = 0;
+		Bprint(&fout, "%8ld: %S\n", start, buf);
+	}
+	Bterm(fin);
+}
+
+int
+isprint(Rune r)
+{
+	if ((r >= ' ' && r <0x7f) || r > 0xA0)
+		return 1;
+	else
+		return 0;
+}
--- a/unicode/Makefile
+++ b/unicode/Makefile
@@ -0,0 +1,10 @@
+# unicode - unicode unix port from plan9
+# Depends on ../lib9
+
+TARG      = unicode
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
--- a/unicode/unicode.1
+++ b/unicode/unicode.1
--- a/unicode/unicode.c
+++ b/unicode/unicode.c
@@ -0,0 +1,122 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+
+char	usage[] = "unicode { [-t] hex hex ... | hexmin-hexmax ... | [-n] char ... }";
+char	hex[] = "0123456789abcdefABCDEF";
+int	numout = 0;
+int	text = 0;
+char	*err;
+Biobuf	bout;
+
+char	*range(char*[]);
+char	*nums(char*[]);
+char	*chars(char*[]);
+
+void
+main(int argc, char *argv[])
+{
+	ARGBEGIN{
+	case 'n':
+		numout = 1;
+		break;
+	case 't':
+		text = 1;
+		break;
+	}ARGEND
+	Binit(&bout, 1, OWRITE);
+	if(argc == 0){
+		fprint(2, "usage: %s\n", usage);
+		exits("usage");
+	}
+	if(!numout && utfrune(argv[0], '-'))
+		exits(range(argv));
+	if(numout || strchr(hex, argv[0][0])==0)
+		exits(nums(argv));
+	exits(chars(argv));
+}
+
+char*
+range(char *argv[])
+{
+	char *q;
+	int min, max;
+	int i;
+
+	while(*argv){
+		q = *argv;
+		if(strchr(hex, q[0]) == 0){
+    err:
+			fprint(2, "unicode: bad range %s\n", *argv);
+			return "bad range";
+		}
+		min = strtoul(q, &q, 16);
+		if(min<0 || min>Runemax || *q!='-')
+			goto err;
+		q++;
+		if(strchr(hex, *q) == 0)
+			goto err;
+		max = strtoul(q, &q, 16);
+		if(max<0 || max>Runemax || max<min || *q!=0)
+			goto err;
+		i = 0;
+		do{
+			Bprint(&bout, "%.4x %C", min, min);
+			i++;
+			if(min==max || (i&7)==0)
+				Bprint(&bout, "\n");
+			else
+				Bprint(&bout, "\t");
+			min++;
+		}while(min<=max);
+		argv++;
+	}
+	return 0;
+}
+
+char*
+nums(char *argv[])
+{
+	char *q;
+	Rune r;
+	int w;
+
+	while(*argv){
+		q = *argv;
+		while(*q){
+			w = chartorune(&r, q);
+			if(r==0x80 && (q[0]&0xFF)!=0x80){
+				fprint(2, "unicode: invalid utf string %s\n", *argv);
+				return "bad utf";
+			}
+			Bprint(&bout, "%.4x\n", r);
+			q += w;
+		}
+		argv++;
+	}
+	return 0;
+}
+
+char*
+chars(char *argv[])
+{
+	char *q;
+	int m;
+
+	while(*argv){
+		q = *argv;
+		if(strchr(hex, q[0]) == 0){
+    err:
+			fprint(2, "unicode: bad unicode value %s\n", *argv);
+			return "bad char";
+		}
+		m = strtoul(q, &q, 16);
+		if(m<0 || m>Runemax || *q!=0)
+			goto err;
+		Bprint(&bout, "%C", m);
+		if(!text)
+			Bprint(&bout, "\n");
+		argv++;
+	}
+	return 0;
+}
--- a/unutf/Makefile
+++ b/unutf/Makefile
@@ -0,0 +1,10 @@
+# unutf - unutf unix port from plan9
+# Depends on ../lib9
+
+TARG      = unutf
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
--- a/unutf/unutf.1
+++ b/unutf/unutf.1
--- a/unutf/unutf.c
+++ b/unutf/unutf.c
@@ -0,0 +1,20 @@
+/*
+ * stupid little program to pipe unicode chars through 
+ * when converting to non-utf compilers.
+ */
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+
+Biobuf bin;
+
+void
+main(void)
+{
+	int c;
+
+	Binit(&bin, 0, OREAD);
+	while((c = Bgetrune(&bin)) >= 0)
+		print("0x%ux\n", c);
+	exits(0);
+}