added commands as discussed with Uriel yesterday

This commit is contained in:
Anselm R Garbe
2010-05-28 11:30:17 +01:00
parent 85bacddf77
commit fa62640154
41 changed files with 4238 additions and 16 deletions

View File

@@ -2,9 +2,56 @@
include config.mk
SUBDIRS = lib9 yacc awk basename bc cal cat cleanname date dc du dd echo ed \
factor fortune fmt freq getflags grep hoc ls mk mkdir mtime primes \
rc read sha1sum sed seq sleep sort tail tee test touch tr troff uniq
SUBDIRS = lib9\
yacc\
ascii\
awk\
basename\
bc\
cal\
cat\
cleanname\
cmp\
date\
dc\
du\
dd\
diff\
echo\
ed\
factor\
fortune\
fmt\
freq\
getflags\
grep\
hoc\
join\
look\
ls\
mk\
mkdir\
mtime\
pbd\
primes\
rc\
read\
sha1sum\
sed\
seq\
sleep\
sort\
split\
strings\
tail\
tee\
test\
touch\
tr\
troff\
unicode\
uniq\
unutf\
all:
@echo 9base build options:

11
TODO
View File

@@ -1,11 +0,0 @@
12:13 < uriel> garbeam: add dd and diff too
12:13 < uriel> and split
12:14 < uriel> (and join)
12:15 < uriel> and unutf (which I just noticed, seems to be undocumented, but seems quite useful too)
12:15 < uriel> and tcs
12:16 < uriel> and strings
12:18 < uriel> oh, oh, I'm finding some great bits:
12:18 < uriel> look(1), ascii(1) and unicode(1)
12:19 < uriel> ok, and cmp(1) is missing too
12:23 < uriel> hah! plan9/src/cmd/index/ is really interesting (but not worth including)
12:26 < uriel> oh! pbd! what a wonderful discovery, we certainly should add it too

10
ascii/Makefile Normal file
View File

@@ -0,0 +1,10 @@
# ascii - ascii unix port from plan9
# Depends on ../lib9
TARG = ascii
include ../std.mk
pre-uninstall:
post-install:

160
ascii/ascii.1 Normal file
View File

@@ -0,0 +1,160 @@
.TH ASCII 1
.SH NAME
ascii, unicode \- interpret ASCII, Unicode characters
.SH SYNOPSIS
.B ascii
[
.B -8
]
[
.BI -oxdb n
]
[
.B -nct
]
[
.I text
]
.PP
.B unicode
[
.B -nt
]
.IB hexmin - hexmax
.PP
.B unicode
[
.B -t
]
.I hex
[
\&...
]
.PP
.B unicode
[
.B -n
]
.I characters
.PP
.B look
.I hex
.B \*9/lib/unicode
.SH DESCRIPTION
.I Ascii
prints the
.SM ASCII
values corresponding to characters and
.I vice
.IR versa ;
under the
.B -8
option, the
.SM ISO
Latin-1 extensions (codes 0200-0377) are included.
The values are interpreted in a settable numeric base;
.B -o
specifies octal,
.B -d
decimal,
.B -x
hexadecimal (the default), and
.BI -b n
base
.IR n .
.PP
With no arguments,
.I ascii
prints a table of the character set in the specified base.
Characters of
.I text
are converted to their
.SM ASCII
values, one per line. If, however, the first
.I text
argument is a valid number in the specified base, conversion
goes the opposite way.
Control characters are printed as two- or three-character mnemonics.
Other options are:
.TP
.B -n
Force numeric output.
.TP
.B -c
Force character output.
.TP
.B -t
Convert from numbers to running text; do not interpret
control characters or insert newlines.
.PP
.I Unicode
is similar; it converts between
.SM UTF
and character values from the Unicode Standard (see
.IR utf (7)).
If given a range of hexadecimal numbers,
.I unicode
prints a table of the specified Unicode characters \(em their values and
.SM UTF
representations.
Otherwise it translates from
.SM UTF
to numeric value or vice versa,
depending on the appearance of the supplied text;
the
.B -n
option forces numeric output to avoid ambiguity with numeric characters.
If converting to
.SM UTF ,
the characters are printed one per line unless the
.B -t
flag is set, in which case the output is a single string
containing only the specified characters.
Unlike
.IR ascii ,
.I unicode
treats no characters specially.
.PP
The output of
.I ascii
and
.I unicode
may be unhelpful if the characters printed are not available in the current font.
.PP
The file
.B \*9/lib/unicode
contains a
table of characters and descriptions, sorted in hexadecimal order,
suitable for
.IR look (1)
on the lower case
.I hex
values of characters.
.SH EXAMPLES
.TP
.B "ascii -d"
Print the
.SM ASCII
table base 10.
.TP
.B "unicode p"
Print the hex value of `p'.
.TP
.B "unicode 2200-22f1"
Print a table of miscellaneous mathematical symbols.
.TP
.B "look 039 \*9/lib/unicode"
See the start of the Greek alphabet's encoding in the Unicode Standard.
.SH FILES
.TP
.B \*9/lib/unicode
table of characters and descriptions.
.SH SOURCE
.B \*9/src/cmd/ascii.c
.br
.B \*9/src/cmd/unicode.c
.SH "SEE ALSO"
.IR look (1),
.IR tcs (1),
.IR utf (7),
.IR font (7)

181
ascii/ascii.c Normal file
View File

@@ -0,0 +1,181 @@
#include <u.h>
#include <libc.h>
#include <bio.h>
#define MAXBASE 36
void usage(void);
void put(int);
void putn(int, int);
void puttext(char *);
void putnum(char *);
int btoi(char *);
int value(int, int);
int isnum(char *);
char *str[256]={
"nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
"bs ", "ht ", "nl ", "vt ", "np ", "cr ", "so ", "si ",
"dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
"can", "em ", "sub", "esc", "fs ", "gs ", "rs ", "us ",
"sp ", " ! ", " \" ", " # ", " $ ", " % ", " & ", " ' ",
" ( ", " ) ", " * ", " + ", " , ", " - ", " . ", " / ",
" 0 ", " 1 ", " 2 ", " 3 ", " 4 ", " 5 ", " 6 ", " 7 ",
" 8 ", " 9 ", " : ", " ; ", " < ", " = ", " > ", " ? ",
" @ ", " A ", " B ", " C ", " D ", " E ", " F ", " G ",
" H ", " I ", " J ", " K ", " L ", " M ", " N ", " O ",
" P ", " Q ", " R ", " S ", " T ", " U ", " V ", " W ",
" X ", " Y ", " Z ", " [ ", " \\ ", " ] ", " ^ ", " _ ",
" ` ", " a ", " b ", " c ", " d ", " e ", " f ", " g ",
" h ", " i ", " j ", " k ", " l ", " m ", " n ", " o ",
" p ", " q ", " r ", " s ", " t ", " u ", " v ", " w ",
" x ", " y ", " z ", " { ", " | ", " } ", " ~ ", "del",
"x80", "x81", "x82", "x83", "x84", "x85", "x86", "x87",
"x88", "x89", "x8a", "x8b", "x8c", "x8d", "x8e", "x8f",
"x90", "x91", "x92", "x93", "x94", "x95", "x96", "x97",
"x98", "x99", "x9a", "x9b", "x9c", "x9d", "x9e", "x9f",
"xa0", " ¡ ", " ¢ ", " £ ", " ¤ ", " ¥ ", " ¦ ", " § ",
" ¨ ", " © ", " ª ", " « ", " ¬ ", " ­ ", " ® ", " ¯ ",
" ° ", " ± ", " ² ", " ³ ", " ´ ", " µ ", "", " · ",
" ¸ ", " ¹ ", " º ", " » ", " ¼ ", " ½ ", " ¾ ", " ¿ ",
" À ", " Á ", " Â ", " Ã ", " Ä ", " Å ", " Æ ", " Ç ",
" È ", " É ", " Ê ", " Ë ", " Ì ", " Í ", " Î ", " Ï ",
" Ð ", " Ñ ", " Ò ", " Ó ", " Ô ", " Õ ", " Ö ", " × ",
" Ø ", " Ù ", " Ú ", " Û ", " Ü ", " Ý ", " Þ ", " ß ",
" à ", " á ", " â ", " ã ", " ä ", " å ", " æ ", " ç ",
" è ", " é ", " ê ", " ë ", " ì ", " í ", " î ", " ï ",
" ð ", " ñ ", " ò ", " ó ", " ô ", " õ ", " ö ", " ÷ ",
" ø ", " ù ", " ú ", " û ", " ü ", " ý ", " þ ", " ÿ "
};
char Ncol[]={
0,0,7,5,4,4,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
};
int nchars=128;
int base=16;
int ncol;
int text=1;
int strip=0;
Biobuf bin;
void
main(int argc, char **argv)
{
int i;
Binit(&bin, 1, OWRITE);
ARGBEGIN{
case '8':
nchars=256; break;
case 'x':
base=16; break;
case 'o':
base=8; break;
case 'd':
base=10; break;
case 'b':
base=strtoul(EARGF(usage()), 0, 0);
if(base<2||base>MAXBASE)
usage();
break;
case 'n':
text=0; break;
case 't':
strip=1;
/* fall through */
case 'c':
text=2; break;
default:
usage();
}ARGEND
ncol=Ncol[base];
if(argc==0){
for(i=0;i<nchars;i++){
put(i);
if((i&7)==7)
Bprint(&bin, "|\n");
}
}else{
if(text==1)
text=isnum(argv[0]);
while(argc--)
if(text)
puttext(*argv++);
else
putnum(*argv++);
}
Bputc(&bin, '\n');
exits(0);
}
void
usage(void)
{
fprint(2, "usage: %s [-8] [-xod | -b8] [-ncst] [--] [text]\n", argv0);
exits("usage");
}
void
put(int i)
{
Bputc(&bin, '|');
putn(i, ncol);
Bprint(&bin, " %s", str[i]);
}
char dig[]="0123456789abcdefghijklmnopqrstuvwxyz";
void
putn(int n, int ndig)
{
if(ndig==0)
return;
putn(n/base, ndig-1);
Bputc(&bin, dig[n%base]);
}
void
puttext(char *s)
{
int n;
n=btoi(s)&0377;
if(strip)
Bputc(&bin, n);
else
Bprint(&bin, "%s\n", str[n]);
}
void
putnum(char *s)
{
while(*s){
putn(*s++&0377, ncol);
Bputc(&bin, '\n');
}
}
int
btoi(char *s)
{
int n;
n=0;
while(*s)
n=n*base+value(*s++, 0);
return(n);
}
int
value(int c, int f)
{
char *s;
for(s=dig; s<dig+base; s++)
if(*s==c)
return(s-dig);
if(f)
return(-1);
fprint(2, "%s: bad input char %c\n", argv0, c);
exits("bad");
return 0; /* to keep ken happy */
}
int
isnum(char *s)
{
while(*s)
if(value(*s++, 1)==-1)
return(0);
return(1);
}

10
cmp/Makefile Normal file
View File

@@ -0,0 +1,10 @@
# cmp - cmp unix port from plan9
# Depends on ../lib9
TARG = cmp
include ../std.mk
pre-uninstall:
post-install:

57
cmp/cmp.1 Normal file
View File

@@ -0,0 +1,57 @@
.TH CMP 1
.SH NAME
cmp \- compare two files
.SH SYNOPSIS
.B cmp
[
.B -lsL
]
.I file1 file2
[
.I offset1
[
.I offset2
]
]
.SH DESCRIPTION
The two files are
compared.
A diagnostic results if the contents differ, otherwise
there is no output.
.PP
The options are:
.TP
.B l
Print the byte number (decimal) and the
differing bytes (hexadecimal) for each difference.
.TP
.B s
Print nothing for differing files,
but set the exit status.
.TP
.B L
Print the line number of the first differing byte.
.PP
If offsets are given,
comparison starts at the designated byte position
of the corresponding file.
Offsets that begin with
.B 0x
are hexadecimal;
with
.BR 0 ,
octal; with anything else, decimal.
.SH SOURCE
.B \*9/src/cmd/cmp.c
.SH "SEE ALSO"
.IR diff (1)
.SH DIAGNOSTICS
If a file is inaccessible or missing, the exit status is
.LR open .
If the files are the same, the exit status is empty (true).
If they are the same except that one is longer than the other, the exit status is
.LR EOF .
Otherwise
.I cmp
reports the position of the first disagreeing byte and the exit status is
.LR differ .

112
cmp/cmp.c Normal file
View File

@@ -0,0 +1,112 @@
#include <u.h>
#include <libc.h>
#define BUF 65536
int sflag = 0;
int lflag = 0;
int Lflag = 0;
static void usage(void);
void
main(int argc, char *argv[])
{
int n, i;
uchar *p, *q;
uchar buf1[BUF], buf2[BUF];
int f1, f2;
vlong nc = 1, o, l = 1;
char *name1, *name2;
uchar *b1s, *b1e, *b2s, *b2e;
ARGBEGIN{
case 's': sflag = 1; break;
case 'l': lflag = 1; break;
case 'L': Lflag = 1; break;
default: usage();
}ARGEND
if(argc < 2)
usage();
if((f1 = open(name1 = *argv++, OREAD)) == -1){
if(!sflag) perror(name1);
exits("open");
}
if((f2 = open(name2 = *argv++, OREAD)) == -1){
if(!sflag) perror(name2);
exits("open");
}
if(*argv){
o = strtoll(*argv++, 0, 0);
if(seek(f1, o, 0) < 0){
if(!sflag) perror("cmp: seek by offset1");
exits("seek 1");
}
}
if(*argv){
o = strtoll(*argv++, 0, 0);
if(seek(f2, o, 0) < 0){
if(!sflag) perror("cmp: seek by offset2");
exits("seek 2");
}
}
if(*argv)
usage();
b1s = b1e = buf1;
b2s = b2e = buf2;
for(;;){
if(b1s >= b1e){
if(b1s >= &buf1[BUF])
b1s = buf1;
n = read(f1, b1s, &buf1[BUF] - b1s);
b1e = b1s + n;
}
if(b2s >= b2e){
if(b2s >= &buf2[BUF])
b2s = buf2;
n = read(f2, b2s, &buf2[BUF] - b2s);
b2e = b2s + n;
}
n = b2e - b2s;
if(n > b1e - b1s)
n = b1e - b1s;
if(n <= 0)
break;
if(memcmp((void *)b1s, (void *)b2s, n) != 0){
if(sflag)
exits("differ");
for(p = b1s, q = b2s, i = 0; i < n; p++, q++, i++) {
if(*p == '\n')
l++;
if(*p != *q){
if(!lflag){
print("%s %s differ: char %lld",
name1, name2, nc+i);
print(Lflag?" line %lld\n":"\n", l);
exits("differ");
}
print("%6lld 0x%.2x 0x%.2x\n", nc+i, *p, *q);
}
}
}
if(Lflag)
for(p = b1s; p < b1e;)
if(*p++ == '\n')
l++;
nc += n;
b1s += n;
b2s += n;
}
if(b1e - b1s == b2e - b2s)
exits((char *)0);
if(!sflag)
print("EOF on %s\n", (b1e - b1s > b2e - b2s)? name2 : name1);
exits("EOF");
}
static void
usage(void)
{
print("Usage: cmp [-lsL] file1 file2 [offset1 [offset2] ]\n");
exits("usage");
}

10
dd/Makefile Normal file
View File

@@ -0,0 +1,10 @@
# dd - dd unix port from plan9
# Depends on ../lib9
TARG = dd
include ../std.mk
pre-uninstall:
post-install:

0
dd/dd.1 Normal file
View File

660
dd/dd.c Normal file
View File

@@ -0,0 +1,660 @@
#include <u.h>
#include <libc.h>
#define BIG 2147483647
#define LCASE (1<<0)
#define UCASE (1<<1)
#define SWAB (1<<2)
#define NERR (1<<3)
#define SYNC (1<<4)
int cflag;
int fflag;
char *string;
char *ifile;
char *ofile;
char *ibuf;
char *obuf;
vlong skip;
vlong oseekn;
vlong iseekn;
vlong count;
long files = 1;
long ibs = 512;
long obs = 512;
long bs;
long cbs;
long ibc;
long obc;
long cbc;
long nifr;
long nipr;
long nofr;
long nopr;
long ntrunc;
int dotrunc = 1;
int ibf;
int obf;
char *op;
int nspace;
uchar etoa[256];
uchar atoe[256];
uchar atoibm[256];
void flsh(void);
int match(char *s);
vlong number(long big);
void cnull(int cc);
void null(int c);
void ascii(int cc);
void unblock(int cc);
void ebcdic(int cc);
void ibm(int cc);
void block(int cc);
void term(void);
void stats(void);
#define iskey(s) ((key[0] == '-') && (strcmp(key+1, s) == 0))
void
main(int argc, char *argv[])
{
void (*conv)(int);
char *ip;
char *key;
int a, c;
conv = null;
for(c=1; c<argc; c++) {
key = argv[c++];
if(c >= argc){
fprint(2, "dd: arg %s needs a value\n", key);
exits("arg");
}
string = argv[c];
if(iskey("ibs")) {
ibs = number(BIG);
continue;
}
if(iskey("obs")) {
obs = number(BIG);
continue;
}
if(iskey("cbs")) {
cbs = number(BIG);
continue;
}
if(iskey("bs")) {
bs = number(BIG);
continue;
}
if(iskey("if")) {
ifile = string;
continue;
}
if(iskey("of")) {
ofile = string;
continue;
}
if(iskey("trunc")) {
dotrunc = number(BIG);
continue;
}
if(iskey("skip")) {
skip = number(BIG);
continue;
}
if(iskey("seek") || iskey("oseek")) {
oseekn = number(BIG);
continue;
}
if(iskey("iseek")) {
iseekn = number(BIG);
continue;
}
if(iskey("count")) {
count = number(BIG);
continue;
}
if(iskey("files")) {
files = number(BIG);
continue;
}
if(iskey("conv")) {
cloop:
if(match(","))
goto cloop;
if(*string == '\0')
continue;
if(match("ebcdic")) {
conv = ebcdic;
goto cloop;
}
if(match("ibm")) {
conv = ibm;
goto cloop;
}
if(match("ascii")) {
conv = ascii;
goto cloop;
}
if(match("block")) {
conv = block;
goto cloop;
}
if(match("unblock")) {
conv = unblock;
goto cloop;
}
if(match("lcase")) {
cflag |= LCASE;
goto cloop;
}
if(match("ucase")) {
cflag |= UCASE;
goto cloop;
}
if(match("swab")) {
cflag |= SWAB;
goto cloop;
}
if(match("noerror")) {
cflag |= NERR;
goto cloop;
}
if(match("sync")) {
cflag |= SYNC;
goto cloop;
}
}
fprint(2, "dd: bad arg: %s\n", key);
exits("arg");
}
if(conv == null && cflag&(LCASE|UCASE))
conv = cnull;
if(ifile)
ibf = open(ifile, 0);
else
ibf = dup(0, -1);
if(ibf < 0) {
fprint(2, "dd: open %s: %r\n", ifile);
exits("open");
}
if(ofile){
if(dotrunc)
obf = create(ofile, 1, 0664);
else
obf = open(ofile, 1);
if(obf < 0) {
fprint(2, "dd: create %s: %r\n", ofile);
exits("create");
}
}else{
obf = dup(1, -1);
if(obf < 0) {
fprint(2, "dd: can't dup file descriptor: %s: %r\n", ofile);
exits("dup");
}
}
if(bs)
ibs = obs = bs;
if(ibs == obs && conv == null)
fflag++;
if(ibs == 0 || obs == 0) {
fprint(2, "dd: counts: cannot be zero\n");
exits("counts");
}
ibuf = sbrk(ibs);
if(fflag)
obuf = ibuf;
else
obuf = sbrk(obs);
sbrk(64); /* For good measure */
if(ibuf == (char *)-1 || obuf == (char *)-1) {
fprint(2, "dd: not enough memory: %r\n");
exits("memory");
}
ibc = 0;
obc = 0;
cbc = 0;
op = obuf;
/*
if(signal(SIGINT, SIG_IGN) != SIG_IGN)
signal(SIGINT, term);
*/
seek(obf, obs*oseekn, 1);
seek(ibf, ibs*iseekn, 1);
while(skip) {
read(ibf, ibuf, ibs);
skip--;
}
ip = 0;
loop:
if(ibc-- == 0) {
ibc = 0;
if(count==0 || nifr+nipr!=count) {
if(cflag&(NERR|SYNC))
for(ip=ibuf+ibs; ip>ibuf;)
*--ip = 0;
ibc = read(ibf, ibuf, ibs);
}
if(ibc == -1) {
perror("read");
if((cflag&NERR) == 0) {
flsh();
term();
}
ibc = 0;
for(c=0; c<ibs; c++)
if(ibuf[c] != 0)
ibc = c;
stats();
}
if(ibc == 0 && --files<=0) {
flsh();
term();
}
if(ibc != ibs) {
nipr++;
if(cflag&SYNC)
ibc = ibs;
} else
nifr++;
ip = ibuf;
c = (ibc>>1) & ~1;
if(cflag&SWAB && c)
do {
a = *ip++;
ip[-1] = *ip;
*ip++ = a;
} while(--c);
ip = ibuf;
if(fflag) {
obc = ibc;
flsh();
ibc = 0;
}
goto loop;
}
c = 0;
c |= *ip++;
c &= 0377;
(*conv)(c);
goto loop;
}
void
flsh(void)
{
int c;
if(obc) {
c = write(obf, obuf, obc);
if(c != obc) {
if(c > 0)
++nopr;
perror("write");
term();
}
if(obc == obs)
nofr++;
else
nopr++;
obc = 0;
}
}
int
match(char *s)
{
char *cs;
cs = string;
while(*cs++ == *s)
if(*s++ == '\0')
goto true;
if(*s != '\0')
return 0;
true:
cs--;
string = cs;
return 1;
}
vlong
number(long big)
{
char *cs;
vlong n;
cs = string;
n = 0;
while(*cs >= '0' && *cs <= '9')
n = n*10 + *cs++ - '0';
for(;;)
switch(*cs++) {
case 'k':
n *= 1024;
continue;
/* case 'w':
n *= sizeof(int);
continue;
*/
case 'b':
n *= 512;
continue;
/* case '*':*/
case 'x':
string = cs;
n *= number(BIG);
case '\0':
if(n>=big || n<0) {
fprint(2, "dd: argument %lld out of range\n", n);
exits("range");
}
return n;
}
/* never gets here */
}
void
cnull(int cc)
{
int c;
c = cc;
if((cflag&UCASE) && c>='a' && c<='z')
c += 'A'-'a';
if((cflag&LCASE) && c>='A' && c<='Z')
c += 'a'-'A';
null(c);
}
void
null(int c)
{
*op = c;
op++;
if(++obc >= obs) {
flsh();
op = obuf;
}
}
void
ascii(int cc)
{
int c;
c = etoa[cc];
if(cbs == 0) {
cnull(c);
return;
}
if(c == ' ') {
nspace++;
goto out;
}
while(nspace > 0) {
null(' ');
nspace--;
}
cnull(c);
out:
if(++cbc >= cbs) {
null('\n');
cbc = 0;
nspace = 0;
}
}
void
unblock(int cc)
{
int c;
c = cc & 0377;
if(cbs == 0) {
cnull(c);
return;
}
if(c == ' ') {
nspace++;
goto out;
}
while(nspace > 0) {
null(' ');
nspace--;
}
cnull(c);
out:
if(++cbc >= cbs) {
null('\n');
cbc = 0;
nspace = 0;
}
}
void
ebcdic(int cc)
{
int c;
c = cc;
if(cflag&UCASE && c>='a' && c<='z')
c += 'A'-'a';
if(cflag&LCASE && c>='A' && c<='Z')
c += 'a'-'A';
c = atoe[c];
if(cbs == 0) {
null(c);
return;
}
if(cc == '\n') {
while(cbc < cbs) {
null(atoe[' ']);
cbc++;
}
cbc = 0;
return;
}
if(cbc == cbs)
ntrunc++;
cbc++;
if(cbc <= cbs)
null(c);
}
void
ibm(int cc)
{
int c;
c = cc;
if(cflag&UCASE && c>='a' && c<='z')
c += 'A'-'a';
if(cflag&LCASE && c>='A' && c<='Z')
c += 'a'-'A';
c = atoibm[c] & 0377;
if(cbs == 0) {
null(c);
return;
}
if(cc == '\n') {
while(cbc < cbs) {
null(atoibm[' ']);
cbc++;
}
cbc = 0;
return;
}
if(cbc == cbs)
ntrunc++;
cbc++;
if(cbc <= cbs)
null(c);
}
void
block(int cc)
{
int c;
c = cc;
if(cflag&UCASE && c>='a' && c<='z')
c += 'A'-'a';
if(cflag&LCASE && c>='A' && c<='Z')
c += 'a'-'A';
c &= 0377;
if(cbs == 0) {
null(c);
return;
}
if(cc == '\n') {
while(cbc < cbs) {
null(' ');
cbc++;
}
cbc = 0;
return;
}
if(cbc == cbs)
ntrunc++;
cbc++;
if(cbc <= cbs)
null(c);
}
void
term(void)
{
stats();
exits(0);
}
void
stats(void)
{
fprint(2, "%lud+%lud records in\n", nifr, nipr);
fprint(2, "%lud+%lud records out\n", nofr, nopr);
if(ntrunc)
fprint(2, "%lud truncated records\n", ntrunc);
}
uchar etoa[] =
{
0000,0001,0002,0003,0234,0011,0206,0177,
0227,0215,0216,0013,0014,0015,0016,0017,
0020,0021,0022,0023,0235,0205,0010,0207,
0030,0031,0222,0217,0034,0035,0036,0037,
0200,0201,0202,0203,0204,0012,0027,0033,
0210,0211,0212,0213,0214,0005,0006,0007,
0220,0221,0026,0223,0224,0225,0226,0004,
0230,0231,0232,0233,0024,0025,0236,0032,
0040,0240,0241,0242,0243,0244,0245,0246,
0247,0250,0133,0056,0074,0050,0053,0041,
0046,0251,0252,0253,0254,0255,0256,0257,
0260,0261,0135,0044,0052,0051,0073,0136,
0055,0057,0262,0263,0264,0265,0266,0267,
0270,0271,0174,0054,0045,0137,0076,0077,
0272,0273,0274,0275,0276,0277,0300,0301,
0302,0140,0072,0043,0100,0047,0075,0042,
0303,0141,0142,0143,0144,0145,0146,0147,
0150,0151,0304,0305,0306,0307,0310,0311,
0312,0152,0153,0154,0155,0156,0157,0160,
0161,0162,0313,0314,0315,0316,0317,0320,
0321,0176,0163,0164,0165,0166,0167,0170,
0171,0172,0322,0323,0324,0325,0326,0327,
0330,0331,0332,0333,0334,0335,0336,0337,
0340,0341,0342,0343,0344,0345,0346,0347,
0173,0101,0102,0103,0104,0105,0106,0107,
0110,0111,0350,0351,0352,0353,0354,0355,
0175,0112,0113,0114,0115,0116,0117,0120,
0121,0122,0356,0357,0360,0361,0362,0363,
0134,0237,0123,0124,0125,0126,0127,0130,
0131,0132,0364,0365,0366,0367,0370,0371,
0060,0061,0062,0063,0064,0065,0066,0067,
0070,0071,0372,0373,0374,0375,0376,0377,
};
uchar atoe[] =
{
0000,0001,0002,0003,0067,0055,0056,0057,
0026,0005,0045,0013,0014,0015,0016,0017,
0020,0021,0022,0023,0074,0075,0062,0046,
0030,0031,0077,0047,0034,0035,0036,0037,
0100,0117,0177,0173,0133,0154,0120,0175,
0115,0135,0134,0116,0153,0140,0113,0141,
0360,0361,0362,0363,0364,0365,0366,0367,
0370,0371,0172,0136,0114,0176,0156,0157,
0174,0301,0302,0303,0304,0305,0306,0307,
0310,0311,0321,0322,0323,0324,0325,0326,
0327,0330,0331,0342,0343,0344,0345,0346,
0347,0350,0351,0112,0340,0132,0137,0155,
0171,0201,0202,0203,0204,0205,0206,0207,
0210,0211,0221,0222,0223,0224,0225,0226,
0227,0230,0231,0242,0243,0244,0245,0246,
0247,0250,0251,0300,0152,0320,0241,0007,
0040,0041,0042,0043,0044,0025,0006,0027,
0050,0051,0052,0053,0054,0011,0012,0033,
0060,0061,0032,0063,0064,0065,0066,0010,
0070,0071,0072,0073,0004,0024,0076,0341,
0101,0102,0103,0104,0105,0106,0107,0110,
0111,0121,0122,0123,0124,0125,0126,0127,
0130,0131,0142,0143,0144,0145,0146,0147,
0150,0151,0160,0161,0162,0163,0164,0165,
0166,0167,0170,0200,0212,0213,0214,0215,
0216,0217,0220,0232,0233,0234,0235,0236,
0237,0240,0252,0253,0254,0255,0256,0257,
0260,0261,0262,0263,0264,0265,0266,0267,
0270,0271,0272,0273,0274,0275,0276,0277,
0312,0313,0314,0315,0316,0317,0332,0333,
0334,0335,0336,0337,0352,0353,0354,0355,
0356,0357,0372,0373,0374,0375,0376,0377,
};
uchar atoibm[] =
{
0000,0001,0002,0003,0067,0055,0056,0057,
0026,0005,0045,0013,0014,0015,0016,0017,
0020,0021,0022,0023,0074,0075,0062,0046,
0030,0031,0077,0047,0034,0035,0036,0037,
0100,0132,0177,0173,0133,0154,0120,0175,
0115,0135,0134,0116,0153,0140,0113,0141,
0360,0361,0362,0363,0364,0365,0366,0367,
0370,0371,0172,0136,0114,0176,0156,0157,
0174,0301,0302,0303,0304,0305,0306,0307,
0310,0311,0321,0322,0323,0324,0325,0326,
0327,0330,0331,0342,0343,0344,0345,0346,
0347,0350,0351,0255,0340,0275,0137,0155,
0171,0201,0202,0203,0204,0205,0206,0207,
0210,0211,0221,0222,0223,0224,0225,0226,
0227,0230,0231,0242,0243,0244,0245,0246,
0247,0250,0251,0300,0117,0320,0241,0007,
0040,0041,0042,0043,0044,0025,0006,0027,
0050,0051,0052,0053,0054,0011,0012,0033,
0060,0061,0032,0063,0064,0065,0066,0010,
0070,0071,0072,0073,0004,0024,0076,0341,
0101,0102,0103,0104,0105,0106,0107,0110,
0111,0121,0122,0123,0124,0125,0126,0127,
0130,0131,0142,0143,0144,0145,0146,0147,
0150,0151,0160,0161,0162,0163,0164,0165,
0166,0167,0170,0200,0212,0213,0214,0215,
0216,0217,0220,0232,0233,0234,0235,0236,
0237,0240,0252,0253,0254,0255,0256,0257,
0260,0261,0262,0263,0264,0265,0266,0267,
0270,0271,0272,0273,0274,0275,0276,0277,
0312,0313,0314,0315,0316,0317,0332,0333,
0334,0335,0336,0337,0352,0353,0354,0355,
0356,0357,0372,0373,0374,0375,0376,0377,
};

35
diff/Makefile Normal file
View File

@@ -0,0 +1,35 @@
# diff - diff shell unix port from plan9
# Depends on ../lib9
TARG = diff
OFILES = diffdir.o diffio.o diffreg.o main.o
MANFILES = diff.1
include ../config.mk
all: ${TARG}
@strip ${TARG}
@echo built ${TARG}
install: ${TARG}
@mkdir -p ${DESTDIR}${PREFIX}/bin
@cp -f ${TARG} ${DESTDIR}${PREFIX}/bin/
@chmod 755 ${DESTDIR}${PREFIX}/bin/${TARG}
@mkdir -p ${DESTDIR}${MANPREFIX}/man1
@cp -f ${MANFILES} ${DESTDIR}${MANPREFIX}/man1
@chmod 444 ${DESTDIR}${MANPREFIX}/man1/${MANFILES}
uninstall:
rm -f ${DESTDIR}${PREFIX}/bin/${TARG}
rm -f ${DESTDIR}${PREFIX}/man1/${MANFILES}
.c.o:
@echo CC $*.c
@${CC} ${CFLAGS} -I../lib9 -I${PREFIX}/include -I../lib9 $*.c
clean:
rm -f ${OFILES} ${TARG}
${TARG}: ${OFILES}
@echo LD ${TARG}
@${CC} ${LDFLAGS} -o ${TARG} ${OFILES} -lm -L${PREFIX}/lib -L../lib9 -l9

163
diff/diff.1 Normal file
View File

@@ -0,0 +1,163 @@
.TH DIFF 1
.SH NAME
diff \- differential file comparator
.SH SYNOPSIS
.B diff
[
.B -acefmnbwr
] file1 ... file2
.SH DESCRIPTION
.I Diff
tells what lines must be changed in two files to bring them
into agreement.
If one file
is a directory,
then a file in that directory with basename the same as that of
the other file is used.
If both files are directories, similarly named files in the
two directories are compared by the method of
.I diff
for text
files and
.IR cmp (1)
otherwise.
If more than two file names are given, then each argument is compared
to the last argument as above.
The
.B -r
option causes
.I diff
to process similarly named subdirectories recursively.
When processing more than one file,
.I diff
prefixes file differences with a single line
listing the two differing files, in the form of
a
.I diff
command line.
The
.B -m
flag causes this behavior even when processing single files.
.PP
The normal output contains lines of these forms:
.IP "" 5
.I n1
.B a
.I n3,n4
.br
.I n1,n2
.B d
.I n3
.br
.I n1,n2
.B c
.I n3,n4
.PP
These lines resemble
.I ed
commands to convert
.I file1
into
.IR file2 .
The numbers after the letters pertain to
.IR file2 .
In fact, by exchanging `a' for `d' and reading backward
one may ascertain equally how to convert
.I file2
into
.IR file1 .
As in
.IR ed ,
identical pairs where
.I n1
=
.I n2
or
.I n3
=
.I n4
are abbreviated as a single number.
.PP
Following each of these lines come all the lines that are
affected in the first file flagged by `<',
then all the lines that are affected in the second file
flagged by `>'.
.PP
The
.B -b
option causes
trailing blanks (spaces and tabs) to be ignored
and other strings of blanks to compare equal.
The
.B -w
option causes all white-space to be removed from input lines
before applying the difference algorithm.
.PP
The
.B -n
option prefixes each range with
.IB file : \fR
and inserts a space around the
.BR a ,
.BR c ,
and
.B d
verbs.
The
.B -e
option produces a script of
.I "a, c"
and
.I d
commands for the editor
.IR ed ,
which will recreate
.I file2
from
.IR file1 .
The
.B -f
option produces a similar script,
not useful with
.IR ed ,
in the opposite order. It may, however, be
useful as input to a stream-oriented post-processor.
.PP
The
.B -c
option includes three lines of context around each
change, merging changes whose contexts overlap.
The
.B -a
flag displays the entire file as context.
.PP
Except in rare circumstances,
.I diff
finds a smallest sufficient set of file
differences.
.SH FILES
.B /tmp/diff[12]
.SH SOURCE
.B \*9/src/cmd/diff
.SH "SEE ALSO"
.IR cmp (1),
.IR comm (1),
.IR ed (1)
.SH DIAGNOSTICS
Exit status is the empty string
for no differences,
.L some
for some,
and
.L error
for trouble.
.SH BUGS
Editing scripts produced under the
.BR -e " or"
.BR -f " option are naive about"
creating lines consisting of a single `\fB.\fR'.
.PP
When running
.I diff
on directories, the notion of what is a text
file is open to debate.

27
diff/diff.h Normal file
View File

@@ -0,0 +1,27 @@
#define stdout bstdout
char mode; /* '\0', 'e', 'f', 'h' */
char bflag; /* ignore multiple and trailing blanks */
char rflag; /* recurse down directory trees */
char mflag; /* pseudo flag: doing multiple files, one dir */
int anychange;
extern Biobuf stdout;
extern int binary;
#define MALLOC(t, n) ((t *)emalloc((n)*sizeof(t)))
#define REALLOC(p, t, n) ((t *)erealloc((void *)(p), (n)*sizeof(t)))
#define FREE(p) free((void *)(p))
#define MAXPATHLEN 1024
int mkpathname(char *, char *, char *);
void *emalloc(unsigned);
void *erealloc(void *, unsigned);
void diff(char *, char *, int);
void diffdir(char *, char *, int);
void diffreg(char *, char *);
Biobuf *prepare(int, char *);
void panic(int, char *, ...);
void check(Biobuf *, Biobuf *);
void change(int, int, int, int);
void flushchanges(void);

113
diff/diffdir.c Normal file
View File

@@ -0,0 +1,113 @@
#include <u.h>
#include <libc.h>
#include <bio.h>
#include "diff.h"
static int
itemcmp(const void *v1, const void *v2)
{
char *const*d1 = v1, *const*d2 = v2;
return strcmp(*d1, *d2);
}
static char **
scandir(char *name)
{
char **cp;
Dir *db;
int nitems;
int fd, n;
if ((fd = open(name, OREAD)) < 0){
panic(mflag ? 0 : 2, "can't open %s\n", name);
return nil;
}
cp = 0;
nitems = 0;
if((n = dirreadall(fd, &db)) > 0){
while (n--) {
cp = REALLOC(cp, char *, (nitems+1));
cp[nitems] = MALLOC(char, strlen((db+n)->name)+1);
strcpy(cp[nitems], (db+n)->name);
nitems++;
}
free(db);
}
cp = REALLOC(cp, char*, (nitems+1));
cp[nitems] = 0;
close(fd);
qsort((char *)cp, nitems, sizeof(char*), itemcmp);
return cp;
}
static int
isdotordotdot(char *p)
{
if (*p == '.') {
if (!p[1])
return 1;
if (p[1] == '.' && !p[2])
return 1;
}
return 0;
}
void
diffdir(char *f, char *t, int level)
{
char **df, **dt, **dirf, **dirt;
char *from, *to;
int res;
char fb[MAXPATHLEN+1], tb[MAXPATHLEN+1];
df = scandir(f);
dt = scandir(t);
dirf = df;
dirt = dt;
if(df == nil || dt == nil)
goto Out;
while (*df || *dt) {
from = *df;
to = *dt;
if (from && isdotordotdot(from)) {
df++;
continue;
}
if (to && isdotordotdot(to)) {
dt++;
continue;
}
if (!from)
res = 1;
else if (!to)
res = -1;
else
res = strcmp(from, to);
if (res < 0) {
if (mode == 0 || mode == 'n')
Bprint(&stdout, "Only in %s: %s\n", f, from);
df++;
continue;
}
if (res > 0) {
if (mode == 0 || mode == 'n')
Bprint(&stdout, "Only in %s: %s\n", t, to);
dt++;
continue;
}
if (mkpathname(fb, f, from))
continue;
if (mkpathname(tb, t, to))
continue;
diff(fb, tb, level+1);
df++; dt++;
}
Out:
for (df = dirf; df && *df; df++)
FREE(*df);
for (dt = dirt; dt && *dt; dt++)
FREE(*dt);
FREE(dirf);
FREE(dirt);
}

387
diff/diffio.c Normal file
View File

@@ -0,0 +1,387 @@
#include <u.h>
#include <libc.h>
#include <bio.h>
#include <ctype.h>
#include "diff.h"
struct line {
int serial;
int value;
};
extern struct line *file[2];
extern int len[2];
extern long *ixold, *ixnew;
extern int *J;
static Biobuf *input[2];
static char *file1, *file2;
static int firstchange;
#define MAXLINELEN 4096
#define MIN(x, y) ((x) < (y) ? (x): (y))
static int
readline(Biobuf *bp, char *buf)
{
int c;
char *p, *e;
p = buf;
e = p + MAXLINELEN-1;
do {
c = Bgetc(bp);
if (c < 0) {
if (p == buf)
return -1;
break;
}
if (c == '\n')
break;
*p++ = c;
} while (p < e);
*p = 0;
if (c != '\n' && c >= 0) {
do c = Bgetc(bp);
while (c >= 0 && c != '\n');
}
return p - buf;
}
#define HALFLONG 16
#define low(x) (x&((1L<<HALFLONG)-1))
#define high(x) (x>>HALFLONG)
/*
* hashing has the effect of
* arranging line in 7-bit bytes and then
* summing 1-s complement in 16-bit hunks
*/
static int
readhash(Biobuf *bp, char *buf)
{
long sum;
unsigned shift;
char *p;
int len, space;
sum = 1;
shift = 0;
if ((len = readline(bp, buf)) == -1)
return 0;
p = buf;
switch(bflag) /* various types of white space handling */
{
case 0:
while (len--) {
sum += (long)*p++ << (shift &= (HALFLONG-1));
shift += 7;
}
break;
case 1:
/*
* coalesce multiple white-space
*/
for (space = 0; len--; p++) {
if (isspace((uchar)*p)) {
space++;
continue;
}
if (space) {
shift += 7;
space = 0;
}
sum += (long)*p << (shift &= (HALFLONG-1));
shift += 7;
}
break;
default:
/*
* strip all white-space
*/
while (len--) {
if (isspace((uchar)*p)) {
p++;
continue;
}
sum += (long)*p++ << (shift &= (HALFLONG-1));
shift += 7;
}
break;
}
sum = low(sum) + high(sum);
return ((short)low(sum) + (short)high(sum));
}
Biobuf *
prepare(int i, char *arg)
{
struct line *p;
int j, h;
Biobuf *bp;
char *cp, buf[MAXLINELEN];
int nbytes;
Rune r;
bp = Bopen(arg, OREAD);
if (!bp) {
panic(mflag ? 0: 2, "cannot open %s: %r\n", arg);
return 0;
}
if (binary)
return bp;
nbytes = Bread(bp, buf, MIN(1024, MAXLINELEN));
if (nbytes > 0) {
cp = buf;
while (cp < buf+nbytes-UTFmax) {
/*
* heuristic for a binary file in the
* brave new UNICODE world
*/
cp += chartorune(&r, cp);
if (r == 0 || (r > 0x7f && r <= 0xa0)) {
binary++;
return bp;
}
}
Bseek(bp, 0, 0);
}
p = MALLOC(struct line, 3);
for (j = 0; h = readhash(bp, buf); p[j].value = h)
p = REALLOC(p, struct line, (++j+3));
len[i] = j;
file[i] = p;
input[i] = bp; /*fix*/
if (i == 0) { /*fix*/
file1 = arg;
firstchange = 0;
}
else
file2 = arg;
return bp;
}
static int
squishspace(char *buf)
{
char *p, *q;
int space;
for (space = 0, q = p = buf; *q; q++) {
if (isspace((uchar)*q)) {
space++;
continue;
}
if (space && bflag == 1) {
*p++ = ' ';
space = 0;
}
*p++ = *q;
}
*p = 0;
return p - buf;
}
/*
* need to fix up for unexpected EOF's
*/
void
check(Biobuf *bf, Biobuf *bt)
{
int f, t, flen, tlen;
char fbuf[MAXLINELEN], tbuf[MAXLINELEN];
ixold[0] = ixnew[0] = 0;
for (f = t = 1; f < len[0]; f++) {
flen = readline(bf, fbuf);
ixold[f] = ixold[f-1] + flen + 1; /* ftell(bf) */
if (J[f] == 0)
continue;
do {
tlen = readline(bt, tbuf);
ixnew[t] = ixnew[t-1] + tlen + 1; /* ftell(bt) */
} while (t++ < J[f]);
if (bflag) {
flen = squishspace(fbuf);
tlen = squishspace(tbuf);
}
if (flen != tlen || strcmp(fbuf, tbuf))
J[f] = 0;
}
while (t < len[1]) {
tlen = readline(bt, tbuf);
ixnew[t] = ixnew[t-1] + tlen + 1; /* fseek(bt) */
t++;
}
}
static void
range(int a, int b, char *separator)
{
Bprint(&stdout, "%d", a > b ? b: a);
if (a < b)
Bprint(&stdout, "%s%d", separator, b);
}
static void
fetch(long *f, int a, int b, Biobuf *bp, char *s)
{
char buf[MAXLINELEN];
int maxb;
if(a <= 1)
a = 1;
if(bp == input[0])
maxb = len[0];
else
maxb = len[1];
if(b > maxb)
b = maxb;
if(a > maxb)
return;
Bseek(bp, f[a-1], 0);
while (a++ <= b) {
readline(bp, buf);
Bprint(&stdout, "%s%s\n", s, buf);
}
}
typedef struct Change Change;
struct Change
{
int a;
int b;
int c;
int d;
};
Change *changes;
int nchanges;
void
change(int a, int b, int c, int d)
{
char verb;
char buf[4];
Change *ch;
if (a > b && c > d)
return;
anychange = 1;
if (mflag && firstchange == 0) {
if(mode) {
buf[0] = '-';
buf[1] = mode;
buf[2] = ' ';
buf[3] = '\0';
} else {
buf[0] = '\0';
}
Bprint(&stdout, "diff %s%s %s\n", buf, file1, file2);
firstchange = 1;
}
verb = a > b ? 'a': c > d ? 'd': 'c';
switch(mode) {
case 'e':
range(a, b, ",");
Bputc(&stdout, verb);
break;
case 0:
range(a, b, ",");
Bputc(&stdout, verb);
range(c, d, ",");
break;
case 'n':
Bprint(&stdout, "%s:", file1);
range(a, b, ",");
Bprint(&stdout, " %c ", verb);
Bprint(&stdout, "%s:", file2);
range(c, d, ",");
break;
case 'f':
Bputc(&stdout, verb);
range(a, b, " ");
break;
case 'c':
case 'a':
if(nchanges%1024 == 0)
changes = erealloc(changes, (nchanges+1024)*sizeof(changes[0]));
ch = &changes[nchanges++];
ch->a = a;
ch->b = b;
ch->c = c;
ch->d = d;
return;
}
Bputc(&stdout, '\n');
if (mode == 0 || mode == 'n') {
fetch(ixold, a, b, input[0], "< ");
if (a <= b && c <= d)
Bprint(&stdout, "---\n");
}
fetch(ixnew, c, d, input[1], mode == 0 || mode == 'n' ? "> ": "");
if (mode != 0 && mode != 'n' && c <= d)
Bprint(&stdout, ".\n");
}
enum
{
Lines = 3 /* number of lines of context shown */
};
int
changeset(int i)
{
while(i<nchanges && changes[i].b+1+2*Lines > changes[i+1].a)
i++;
if(i<nchanges)
return i+1;
return nchanges;
}
void
flushchanges(void)
{
int a, b, c, d, at;
int i, j;
if(nchanges == 0)
return;
for(i=0; i<nchanges; ){
j = changeset(i);
a = changes[i].a-Lines;
b = changes[j-1].b+Lines;
c = changes[i].c-Lines;
d = changes[j-1].d+Lines;
if(a < 1)
a = 1;
if(c < 1)
c = 1;
if(b > len[0])
b = len[0];
if(d > len[1])
d = len[1];
if(mode == 'a'){
a = 1;
b = len[0];
c = 1;
d = len[1];
j = nchanges;
}
Bprint(&stdout, "%s:", file1);
range(a, b, ",");
Bprint(&stdout, " - ");
Bprint(&stdout, "%s:", file2);
range(c, d, ",");
Bputc(&stdout, '\n');
at = a;
for(; i<j; i++){
fetch(ixold, at, changes[i].a-1, input[0], " ");
fetch(ixold, changes[i].a, changes[i].b, input[0], "- ");
fetch(ixnew, changes[i].c, changes[i].d, input[1], "+ ");
at = changes[i].b+1;
}
fetch(ixold, at, b, input[0], " ");
}
nchanges = 0;
}

420
diff/diffreg.c Normal file
View File

@@ -0,0 +1,420 @@
#include <u.h>
#include <libc.h>
#include <bio.h>
#include "diff.h"
/* diff - differential file comparison
*
* Uses an algorithm due to Harold Stone, which finds
* a pair of longest identical subsequences in the two
* files.
*
* The major goal is to generate the match vector J.
* J[i] is the index of the line in file1 corresponding
* to line i file0. J[i] = 0 if there is no
* such line in file1.
*
* Lines are hashed so as to work in core. All potential
* matches are located by sorting the lines of each file
* on the hash (called value). In particular, this
* collects the equivalence classes in file1 together.
* Subroutine equiv replaces the value of each line in
* file0 by the index of the first element of its
* matching equivalence in (the reordered) file1.
* To save space equiv squeezes file1 into a single
* array member in which the equivalence classes
* are simply concatenated, except that their first
* members are flagged by changing sign.
*
* Next the indices that point into member are unsorted into
* array class according to the original order of file0.
*
* The cleverness lies in routine stone. This marches
* through the lines of file0, developing a vector klist
* of "k-candidates". At step i a k-candidate is a matched
* pair of lines x,y (x in file0 y in file1) such that
* there is a common subsequence of lenght k
* between the first i lines of file0 and the first y
* lines of file1, but there is no such subsequence for
* any smaller y. x is the earliest possible mate to y
* that occurs in such a subsequence.
*
* Whenever any of the members of the equivalence class of
* lines in file1 matable to a line in file0 has serial number
* less than the y of some k-candidate, that k-candidate
* with the smallest such y is replaced. The new
* k-candidate is chained (via pred) to the current
* k-1 candidate so that the actual subsequence can
* be recovered. When a member has serial number greater
* that the y of all k-candidates, the klist is extended.
* At the end, the longest subsequence is pulled out
* and placed in the array J by unravel.
*
* With J in hand, the matches there recorded are
* check'ed against reality to assure that no spurious
* matches have crept in due to hashing. If they have,
* they are broken, and "jackpot " is recorded--a harmless
* matter except that a true match for a spuriously
* mated line may now be unnecessarily reported as a change.
*
* Much of the complexity of the program comes simply
* from trying to minimize core utilization and
* maximize the range of doable problems by dynamically
* allocating what is needed and reusing what is not.
* The core requirements for problems larger than somewhat
* are (in words) 2*length(file0) + length(file1) +
* 3*(number of k-candidates installed), typically about
* 6n words for files of length n.
*/
/* TIDY THIS UP */
struct cand {
int x;
int y;
int pred;
} cand;
struct line {
int serial;
int value;
} *file[2], line;
int len[2];
int binary;
struct line *sfile[2]; /*shortened by pruning common prefix and suffix*/
int slen[2];
int pref, suff; /*length of prefix and suffix*/
int *class; /*will be overlaid on file[0]*/
int *member; /*will be overlaid on file[1]*/
int *klist; /*will be overlaid on file[0] after class*/
struct cand *clist; /* merely a free storage pot for candidates */
int clen;
int *J; /*will be overlaid on class*/
long *ixold; /*will be overlaid on klist*/
long *ixnew; /*will be overlaid on file[1]*/
/* END OF SOME TIDYING */
static void
sort(struct line *a, int n) /*shellsort CACM #201*/
{
int m;
struct line *ai, *aim, *j, *k;
struct line w;
int i;
m = 0;
for (i = 1; i <= n; i *= 2)
m = 2*i - 1;
for (m /= 2; m != 0; m /= 2) {
k = a+(n-m);
for (j = a+1; j <= k; j++) {
ai = j;
aim = ai+m;
do {
if (aim->value > ai->value ||
aim->value == ai->value &&
aim->serial > ai->serial)
break;
w = *ai;
*ai = *aim;
*aim = w;
aim = ai;
ai -= m;
} while (ai > a && aim >= ai);
}
}
}
static void
unsort(struct line *f, int l, int *b)
{
int *a;
int i;
a = MALLOC(int, (l+1));
for(i=1;i<=l;i++)
a[f[i].serial] = f[i].value;
for(i=1;i<=l;i++)
b[i] = a[i];
FREE(a);
}
static void
prune(void)
{
int i,j;
for(pref=0;pref<len[0]&&pref<len[1]&&
file[0][pref+1].value==file[1][pref+1].value;
pref++ ) ;
for(suff=0;suff<len[0]-pref&&suff<len[1]-pref&&
file[0][len[0]-suff].value==file[1][len[1]-suff].value;
suff++) ;
for(j=0;j<2;j++) {
sfile[j] = file[j]+pref;
slen[j] = len[j]-pref-suff;
for(i=0;i<=slen[j];i++)
sfile[j][i].serial = i;
}
}
static void
equiv(struct line *a, int n, struct line *b, int m, int *c)
{
int i, j;
i = j = 1;
while(i<=n && j<=m) {
if(a[i].value < b[j].value)
a[i++].value = 0;
else if(a[i].value == b[j].value)
a[i++].value = j;
else
j++;
}
while(i <= n)
a[i++].value = 0;
b[m+1].value = 0;
j = 0;
while(++j <= m) {
c[j] = -b[j].serial;
while(b[j+1].value == b[j].value) {
j++;
c[j] = b[j].serial;
}
}
c[j] = -1;
}
static int
newcand(int x, int y, int pred)
{
struct cand *q;
clist = REALLOC(clist, struct cand, (clen+1));
q = clist + clen;
q->x = x;
q->y = y;
q->pred = pred;
return clen++;
}
static int
search(int *c, int k, int y)
{
int i, j, l;
int t;
if(clist[c[k]].y < y) /*quick look for typical case*/
return k+1;
i = 0;
j = k+1;
while((l=(i+j)/2) > i) {
t = clist[c[l]].y;
if(t > y)
j = l;
else if(t < y)
i = l;
else
return l;
}
return l+1;
}
static int
stone(int *a, int n, int *b, int *c)
{
int i, k,y;
int j, l;
int oldc, tc;
int oldl;
k = 0;
c[0] = newcand(0,0,0);
for(i=1; i<=n; i++) {
j = a[i];
if(j==0)
continue;
y = -b[j];
oldl = 0;
oldc = c[0];
do {
if(y <= clist[oldc].y)
continue;
l = search(c, k, y);
if(l!=oldl+1)
oldc = c[l-1];
if(l<=k) {
if(clist[c[l]].y <= y)
continue;
tc = c[l];
c[l] = newcand(i,y,oldc);
oldc = tc;
oldl = l;
} else {
c[l] = newcand(i,y,oldc);
k++;
break;
}
} while((y=b[++j]) > 0);
}
return k;
}
static void
unravel(int p)
{
int i;
struct cand *q;
for(i=0; i<=len[0]; i++) {
if (i <= pref)
J[i] = i;
else if (i > len[0]-suff)
J[i] = i+len[1]-len[0];
else
J[i] = 0;
}
for(q=clist+p;q->y!=0;q=clist+q->pred)
J[q->x+pref] = q->y+pref;
}
static void
output(void)
{
int m, i0, i1, j0, j1;
m = len[0];
J[0] = 0;
J[m+1] = len[1]+1;
if (mode != 'e') {
for (i0 = 1; i0 <= m; i0 = i1+1) {
while (i0 <= m && J[i0] == J[i0-1]+1)
i0++;
j0 = J[i0-1]+1;
i1 = i0-1;
while (i1 < m && J[i1+1] == 0)
i1++;
j1 = J[i1+1]-1;
J[i1] = j1;
change(i0, i1, j0, j1);
}
}
else {
for (i0 = m; i0 >= 1; i0 = i1-1) {
while (i0 >= 1 && J[i0] == J[i0+1]-1 && J[i0])
i0--;
j0 = J[i0+1]-1;
i1 = i0+1;
while (i1 > 1 && J[i1-1] == 0)
i1--;
j1 = J[i1-1]+1;
J[i1] = j1;
change(i1 , i0, j1, j0);
}
}
if (m == 0)
change(1, 0, 1, len[1]);
flushchanges();
}
#define BUF 4096
static int
cmp(Biobuf* b1, Biobuf* b2)
{
int n;
uchar buf1[BUF], buf2[BUF];
int f1, f2;
vlong nc = 1;
uchar *b1s, *b1e, *b2s, *b2e;
f1 = Bfildes(b1);
f2 = Bfildes(b2);
seek(f1, 0, 0);
seek(f2, 0, 0);
b1s = b1e = buf1;
b2s = b2e = buf2;
for(;;){
if(b1s >= b1e){
if(b1s >= &buf1[BUF])
b1s = buf1;
n = read(f1, b1s, &buf1[BUF] - b1s);
b1e = b1s + n;
}
if(b2s >= b2e){
if(b2s >= &buf2[BUF])
b2s = buf2;
n = read(f2, b2s, &buf2[BUF] - b2s);
b2e = b2s + n;
}
n = b2e - b2s;
if(n > b1e - b1s)
n = b1e - b1s;
if(n <= 0)
break;
if(memcmp((void *)b1s, (void *)b2s, n) != 0){
return 1;
}
nc += n;
b1s += n;
b2s += n;
}
if(b1e - b1s == b2e - b2s)
return 0;
return 1;
}
void
diffreg(char *f, char *t)
{
Biobuf *b0, *b1;
int k;
binary = 0;
b0 = prepare(0, f);
if (!b0)
return;
b1 = prepare(1, t);
if (!b1) {
FREE(file[0]);
Bterm(b0);
return;
}
if (binary){
/* could use b0 and b1 but this is simpler. */
if (cmp(b0, b1))
print("binary files %s %s differ\n", f, t);
Bterm(b0);
Bterm(b1);
return;
}
clen = 0;
prune();
sort(sfile[0], slen[0]);
sort(sfile[1], slen[1]);
member = (int *)file[1];
equiv(sfile[0], slen[0], sfile[1], slen[1], member);
member = REALLOC(member, int, slen[1]+2);
class = (int *)file[0];
unsort(sfile[0], slen[0], class);
class = REALLOC(class, int, slen[0]+2);
klist = MALLOC(int, slen[0]+2);
clist = MALLOC(struct cand, 1);
k = stone(class, slen[0], member, klist);
FREE(member);
FREE(class);
J = MALLOC(int, len[0]+2);
unravel(klist[k]);
FREE(clist);
FREE(klist);
ixold = MALLOC(long, len[0]+2);
ixnew = MALLOC(long, len[1]+2);
Bseek(b0, 0, 0); Bseek(b1, 0, 0);
check(b0, b1);
output();
FREE(J); FREE(ixold); FREE(ixnew);
Bterm(b0); Bterm(b1); /* ++++ */
}

270
diff/main.c Normal file
View File

@@ -0,0 +1,270 @@
#include <u.h>
#include <libc.h>
#include <bio.h>
#include "diff.h"
#define DIRECTORY(s) ((s)->qid.type&QTDIR)
#define REGULAR_FILE(s) ((s)->type == 'M' && !DIRECTORY(s))
Biobuf stdout;
static char *tmp[] = {"/tmp/diff1XXXXXXXXXXX", "/tmp/diff2XXXXXXXXXXX"};
static int whichtmp;
static char *progname;
static char usage[] = "diff [ -acefmnbwr ] file1 ... file2\n";
static void
rmtmpfiles(void)
{
while (whichtmp > 0) {
whichtmp--;
remove(tmp[whichtmp]);
}
}
void
done(int status)
{
rmtmpfiles();
switch(status)
{
case 0:
exits("");
case 1:
exits("some");
default:
exits("error");
}
/*NOTREACHED*/
}
void
panic(int status, char *fmt, ...)
{
va_list arg;
Bflush(&stdout);
fprint(2, "%s: ", progname);
va_start(arg, fmt);
vfprint(2, fmt, arg);
va_end(arg);
if (status)
done(status);
/*NOTREACHED*/
}
static int
catch(void *a, char *msg)
{
USED(a);
panic(2, msg);
return 1;
}
int
mkpathname(char *pathname, char *path, char *name)
{
if (strlen(path) + strlen(name) > MAXPATHLEN) {
panic(0, "pathname %s/%s too long\n", path, name);
return 1;
}
sprint(pathname, "%s/%s", path, name);
return 0;
}
static char *
mktmpfile(int input, Dir **sb)
{
int fd, i;
char *p;
char buf[8192];
atnotify(catch, 1);
/*
p = mktemp(tmp[whichtmp++]);
fd = create(p, OWRITE, 0600);
*/
fd = mkstemp(p=tmp[whichtmp++]);
if (fd < 0) {
panic(mflag ? 0: 2, "cannot create %s: %r\n", p);
return 0;
}
while ((i = read(input, buf, sizeof(buf))) > 0) {
if ((i = write(fd, buf, i)) < 0)
break;
}
*sb = dirfstat(fd);
close(fd);
if (i < 0) {
panic(mflag ? 0: 2, "cannot read/write %s: %r\n", p);
return 0;
}
return p;
}
static char *
statfile(char *file, Dir **sb)
{
Dir *dir;
int input;
dir = dirstat(file);
if(dir == nil) {
if (strcmp(file, "-") || (dir = dirfstat(0)) == nil) {
panic(mflag ? 0: 2, "cannot stat %s: %r\n", file);
return 0;
}
free(dir);
return mktmpfile(0, sb);
}
else if (!REGULAR_FILE(dir) && !DIRECTORY(dir)) {
free(dir);
if ((input = open(file, OREAD)) == -1) {
panic(mflag ? 0: 2, "cannot open %s: %r\n", file);
return 0;
}
file = mktmpfile(input, sb);
close(input);
}
else
*sb = dir;
return file;
}
void
diff(char *f, char *t, int level)
{
char *fp, *tp, *p, fb[MAXPATHLEN+1], tb[MAXPATHLEN+1];
Dir *fsb, *tsb;
if ((fp = statfile(f, &fsb)) == 0)
goto Return;
if ((tp = statfile(t, &tsb)) == 0){
free(fsb);
goto Return;
}
if (DIRECTORY(fsb) && DIRECTORY(tsb)) {
if (rflag || level == 0)
diffdir(fp, tp, level);
else
Bprint(&stdout, "Common subdirectories: %s and %s\n",
fp, tp);
}
else if (REGULAR_FILE(fsb) && REGULAR_FILE(tsb))
diffreg(fp, tp);
else {
if (REGULAR_FILE(fsb)) {
if ((p = utfrrune(f, '/')) == 0)
p = f;
else
p++;
if (mkpathname(tb, tp, p) == 0)
diffreg(fp, tb);
}
else {
if ((p = utfrrune(t, '/')) == 0)
p = t;
else
p++;
if (mkpathname(fb, fp, p) == 0)
diffreg(fb, tp);
}
}
free(fsb);
free(tsb);
Return:
rmtmpfiles();
}
void
main(int argc, char *argv[])
{
char *p;
int i;
Dir *fsb, *tsb;
extern int _p9usepwlibrary;
_p9usepwlibrary = 0;
Binit(&stdout, 1, OWRITE);
progname = *argv;
while (--argc && (*++argv)[0] == '-' && (*argv)[1]) {
for (p = *argv+1; *p; p++) {
switch (*p) {
case 'e':
case 'f':
case 'n':
case 'c':
case 'a':
mode = *p;
break;
case 'w':
bflag = 2;
break;
case 'b':
bflag = 1;
break;
case 'r':
rflag = 1;
mflag = 1;
break;
case 'm':
mflag = 1;
break;
case 'h':
default:
progname = "Usage";
panic(2, usage);
}
}
}
if (argc < 2)
panic(2, usage, progname);
if ((tsb = dirstat(argv[argc-1])) == nil)
panic(2, "can't stat %s\n", argv[argc-1]);
if (argc > 2) {
if (!DIRECTORY(tsb))
panic(2, usage, progname);
mflag = 1;
}
else {
if ((fsb = dirstat(argv[0])) == nil)
panic(2, "can't stat %s\n", argv[0]);
if (DIRECTORY(fsb) && DIRECTORY(tsb))
mflag = 1;
free(fsb);
}
free(tsb);
for (i = 0; i < argc-1; i++)
diff(argv[i], argv[argc-1], 0);
done(anychange);
/*NOTREACHED*/
}
static char noroom[] = "out of memory - try diff -h\n";
void *
emalloc(unsigned n)
{
register void *p;
if ((p = malloc(n)) == 0)
panic(2, noroom);
return p;
}
void *
erealloc(void *p, unsigned n)
{
register void *rp;
if ((rp = realloc(p, n)) == 0)
panic(2, noroom);
return rp;
}

10
join/Makefile Normal file
View File

@@ -0,0 +1,10 @@
# join - join unix port from plan9
# Depends on ../lib9
TARG = join
include ../std.mk
pre-uninstall:
post-install:

147
join/join.1 Normal file
View File

@@ -0,0 +1,147 @@
.TH JOIN 1
.CT 1 files
.SH NAME
join \- relational database operator
.SH SYNOPSIS
.B join
[
.I options
]
.I file1 file2
.SH DESCRIPTION
.I Join
forms, on the standard output,
a join
of the two relations specified by the lines of
.I file1
and
.IR file2 .
If one of the file names is
.LR - ,
the standard input is used.
.PP
.I File1
and
.I file2
must be sorted in increasing
.SM ASCII
collating
sequence on the fields
on which they are to be joined,
normally the first in each line.
.PP
There is one line in the output
for each pair of lines in
.I file1
and
.I file2
that have identical join fields.
The output line normally consists of the common field,
then the rest of the line from
.IR file1 ,
then the rest of the line from
.IR file2 .
.PP
Input fields are normally separated spaces or tabs;
output fields by space.
In this case, multiple separators count as one, and
leading separators are discarded.
.PP
The following options are recognized, with POSIX syntax.
.TP
.BI -a " n
In addition to the normal output,
produce a line for each unpairable line in file
.IR n ,
where
.I n
is 1 or 2.
.TP
.BI -v " n
Like
.BR -a ,
omitting output for paired lines.
.TP
.BI -e " s
Replace empty output fields by string
.IR s .
.TP
.BI -1 " m
.br
.ns
.TP
.BI -2 " m
Join on the
.IR m th
field of
.I file1
or
.IR file2 .
.TP
.BI -j "n m"
Archaic equivalent for
.BI - n " m"\f1.
.TP
.BI -o fields
Each output line comprises the designated fields.
The comma-separated field designators are either
.BR 0 ,
meaning the join field, or have the form
.IR n . m ,
where
.I n
is a file number and
.I m
is a field number.
Archaic usage allows separate arguments for field designators.
.PP
.TP
.BI -t c
Use character
.I c
as the only separator (tab character) on input and output.
Every appearance of
.I c
in a line is significant.
.SH EXAMPLES
.TP
.L
sort /etc/passwd | join -t: -1 1 -a 1 -e "" - bdays
Add birthdays to the
.B /etc/passwd
file, leaving unknown
birthdays empty.
The layout of
.B /adm/users
is given in
.IR passwd (5);
.B bdays
contains sorted lines like
.LR "ken:Feb\ 4,\ 1953" .
.TP
.L
tr : ' ' </etc/passwd | sort -k 3 3 >temp
.br
.ns
.TP
.L
join -1 3 -2 3 -o 1.1,2.1 temp temp | awk '$1 < $2'
Print all pairs of users with identical userids.
.SH SOURCE
.B \*9/src/cmd/join.c
.SH "SEE ALSO"
.IR sort (1),
.IR comm (1),
.IR awk (1)
.SH BUGS
With default field separation,
the collating sequence is that of
.BI "sort -b"
.BI -k y , y\f1;
with
.BR -t ,
the sequence is that of
.BI "sort -t" x
.BI -k y , y\f1.
.PP
One of the files must be randomly accessible.

369
join/join.c Normal file
View File

@@ -0,0 +1,369 @@
/* join F1 F2 on stuff */
#include <u.h>
#include <libc.h>
#include <stdio.h>
#include <ctype.h>
#define F1 0
#define F2 1
#define F0 3
#define NFLD 100 /* max field per line */
#define comp() runecmp(ppi[F1][j1],ppi[F2][j2])
FILE *f[2];
Rune buf[2][BUFSIZ]; /*input lines */
Rune *ppi[2][NFLD+1]; /* pointers to fields in lines */
Rune *s1,*s2;
#define j1 joinj1
#define j2 joinj2
int j1 = 1; /* join of this field of file 1 */
int j2 = 1; /* join of this field of file 2 */
int olist[2*NFLD]; /* output these fields */
int olistf[2*NFLD]; /* from these files */
int no; /* number of entries in olist */
Rune sep1 = ' '; /* default field separator */
Rune sep2 = '\t';
char *sepstr=" ";
int discard; /* count of truncated lines */
Rune null[BUFSIZ]/* = L""*/;
int a1;
int a2;
char *getoptarg(int*, char***);
void output(int, int);
int input(int);
void oparse(char*);
void error(char*, char*);
void seek1(void), seek2(void);
Rune *strtorune(Rune *, char *);
void
main(int argc, char **argv)
{
int i;
while (argc > 1 && argv[1][0] == '-') {
if (argv[1][1] == '\0')
break;
switch (argv[1][1]) {
case '-':
argc--;
argv++;
goto proceed;
case 'a':
switch(*getoptarg(&argc, &argv)) {
case '1':
a1++;
break;
case '2':
a2++;
break;
default:
error("incomplete option -a","");
}
break;
case 'e':
strtorune(null, getoptarg(&argc, &argv));
break;
case 't':
sepstr=getoptarg(&argc, &argv);
chartorune(&sep1, sepstr);
sep2 = sep1;
break;
case 'o':
if(argv[1][2]!=0 ||
argc>2 && strchr(argv[2],',')!=0)
oparse(getoptarg(&argc, &argv));
else for (no = 0; no<2*NFLD && argc>2; no++){
if (argv[2][0] == '1' && argv[2][1] == '.') {
olistf[no] = F1;
olist[no] = atoi(&argv[2][2]);
} else if (argv[2][0] == '2' && argv[2][1] == '.') {
olist[no] = atoi(&argv[2][2]);
olistf[no] = F2;
} else if (argv[2][0] == '0')
olistf[no] = F0;
else
break;
argc--;
argv++;
}
break;
case 'j':
if(argc <= 2)
break;
if (argv[1][2] == '1')
j1 = atoi(argv[2]);
else if (argv[1][2] == '2')
j2 = atoi(argv[2]);
else
j1 = j2 = atoi(argv[2]);
argc--;
argv++;
break;
case '1':
j1 = atoi(getoptarg(&argc, &argv));
break;
case '2':
j2 = atoi(getoptarg(&argc, &argv));
break;
}
argc--;
argv++;
}
proceed:
for (i = 0; i < no; i++)
if (olist[i]-- > NFLD) /* 0 origin */
error("field number too big in -o","");
if (argc != 3)
error("usage: join [-1 x -2 y] [-o list] file1 file2","");
j1--;
j2--; /* everyone else believes in 0 origin */
s1 = ppi[F1][j1];
s2 = ppi[F2][j2];
if (strcmp(argv[1], "-") == 0)
f[F1] = stdin;
else if ((f[F1] = fopen(argv[1], "r")) == 0)
error("can't open %s", argv[1]);
if(strcmp(argv[2], "-") == 0) {
f[F2] = stdin;
} else if ((f[F2] = fopen(argv[2], "r")) == 0)
error("can't open %s", argv[2]);
if(ftell(f[F2]) >= 0)
seek2();
else if(ftell(f[F1]) >= 0)
seek1();
else
error("neither file is randomly accessible","");
if (discard)
error("some input line was truncated", "");
exits("");
}
int runecmp(Rune *a, Rune *b){
while(*a==*b){
if(*a=='\0') return 0;
a++;
b++;
}
if(*a<*b) return -1;
return 1;
}
char *runetostr(char *buf, Rune *r){
char *s;
for(s=buf;*r;r++) s+=runetochar(s, r);
*s='\0';
return buf;
}
Rune *strtorune(Rune *buf, char *s){
Rune *r;
for(r=buf;*s;r++) s+=chartorune(r, s);
*r='\0';
return buf;
}
/* lazy. there ought to be a clean way to combine seek1 & seek2 */
#define get1() n1=input(F1)
#define get2() n2=input(F2)
void
seek2(void)
{
int n1, n2;
int top2=0;
int bot2 = ftell(f[F2]);
get1();
get2();
while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
if(n1>0 && n2>0 && comp()>0 || n1==0) {
if(a2) output(0, n2);
bot2 = ftell(f[F2]);
get2();
} else if(n1>0 && n2>0 && comp()<0 || n2==0) {
if(a1) output(n1, 0);
get1();
} else /*(n1>0 && n2>0 && comp()==0)*/ {
while(n2>0 && comp()==0) {
output(n1, n2);
top2 = ftell(f[F2]);
get2();
}
fseek(f[F2], bot2, 0);
get2();
get1();
for(;;) {
if(n1>0 && n2>0 && comp()==0) {
output(n1, n2);
get2();
} else if(n1>0 && n2>0 && comp()<0 || n2==0) {
fseek(f[F2], bot2, 0);
get2();
get1();
} else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{
fseek(f[F2], top2, 0);
bot2 = top2;
get2();
break;
}
}
}
}
}
void
seek1(void)
{
int n1, n2;
int top1=0;
int bot1 = ftell(f[F1]);
get1();
get2();
while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
if(n1>0 && n2>0 && comp()>0 || n1==0) {
if(a2) output(0, n2);
get2();
} else if(n1>0 && n2>0 && comp()<0 || n2==0) {
if(a1) output(n1, 0);
bot1 = ftell(f[F1]);
get1();
} else /*(n1>0 && n2>0 && comp()==0)*/ {
while(n2>0 && comp()==0) {
output(n1, n2);
top1 = ftell(f[F1]);
get1();
}
fseek(f[F1], bot1, 0);
get2();
get1();
for(;;) {
if(n1>0 && n2>0 && comp()==0) {
output(n1, n2);
get1();
} else if(n1>0 && n2>0 && comp()>0 || n1==0) {
fseek(f[F1], bot1, 0);
get2();
get1();
} else /*(n1>0 && n2>0 && comp()<0 || n2==0)*/{
fseek(f[F1], top1, 0);
bot1 = top1;
get1();
break;
}
}
}
}
}
int
input(int n) /* get input line and split into fields */
{
register int i, c;
Rune *bp;
Rune **pp;
char line[BUFSIZ];
bp = buf[n];
pp = ppi[n];
if (fgets(line, BUFSIZ, f[n]) == 0)
return(0);
strtorune(bp, line);
i = 0;
do {
i++;
if (sep1 == ' ') /* strip multiples */
while ((c = *bp) == sep1 || c == sep2)
bp++; /* skip blanks */
*pp++ = bp; /* record beginning */
while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0')
bp++;
*bp++ = '\0'; /* mark end by overwriting blank */
} while (c != '\n' && c != '\0' && i < NFLD-1);
if (c != '\n')
discard++;
*pp = 0;
return(i);
}
void
output(int on1, int on2) /* print items from olist */
{
int i;
Rune *temp;
char buf[BUFSIZ];
if (no <= 0) { /* default case */
printf("%s", runetostr(buf, on1? ppi[F1][j1]: ppi[F2][j2]));
for (i = 0; i < on1; i++)
if (i != j1)
printf("%s%s", sepstr, runetostr(buf, ppi[F1][i]));
for (i = 0; i < on2; i++)
if (i != j2)
printf("%s%s", sepstr, runetostr(buf, ppi[F2][i]));
printf("\n");
} else {
for (i = 0; i < no; i++) {
if (olistf[i]==F0 && on1>j1)
temp = ppi[F1][j1];
else if (olistf[i]==F0 && on2>j2)
temp = ppi[F2][j2];
else {
temp = ppi[olistf[i]][olist[i]];
if(olistf[i]==F1 && on1<=olist[i] ||
olistf[i]==F2 && on2<=olist[i] ||
*temp==0)
temp = null;
}
printf("%s", runetostr(buf, temp));
if (i == no - 1)
printf("\n");
else
printf("%s", sepstr);
}
}
}
void
error(char *s1, char *s2)
{
fprintf(stderr, "join: ");
fprintf(stderr, s1, s2);
fprintf(stderr, "\n");
exits(s1);
}
char *
getoptarg(int *argcp, char ***argvp)
{
int argc = *argcp;
char **argv = *argvp;
if(argv[1][2] != 0)
return &argv[1][2];
if(argc<=2 || argv[2][0]=='-')
error("incomplete option %s", argv[1]);
*argcp = argc-1;
*argvp = ++argv;
return argv[1];
}
void
oparse(char *s)
{
for (no = 0; no<2*NFLD && *s; no++, s++) {
switch(*s) {
case 0:
return;
case '0':
olistf[no] = F0;
break;
case '1':
case '2':
if(s[1] == '.' && isdigit((uchar)s[2])) {
olistf[no] = *s=='1'? F1: F2;
olist[no] = atoi(s += 2);
break;
} /* fall thru */
default:
error("invalid -o list", "");
}
if(s[1] == ',')
s++;
}
}

View File

@@ -11,7 +11,8 @@ enum
UTFmax = 3, /* maximum bytes per rune */
Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */
Runeself = 0x80, /* rune and UTF sequences are the same (<) */
Runeerror = 0xFFFD /* decoding error in UTF */
Runeerror = 0xFFFD, /* decoding error in UTF */
Runemax = 0x10FFFF /* maximum rune value */
};
/* Edit .+1,/^$/ | cfn $PLAN9/src/lib9/utf/?*.c | grep -v static |grep -v __ */

10
look/Makefile Normal file
View File

@@ -0,0 +1,10 @@
# look - look unix port from plan9
# Depends on ../lib9
TARG = look
include ../std.mk
pre-uninstall:
post-install:

85
look/look.1 Normal file
View File

@@ -0,0 +1,85 @@
.TH LOOK 1
.SH NAME
look \- find lines in a sorted list
.SH SYNOPSIS
.B look
[
.BI -dfnixt c
]
[
.I string
]
[
.I file
]
.SH DESCRIPTION
.I Look
consults a sorted
.I file
and prints all lines that begin with
.IR string .
It uses binary search.
.PP
The following options are recognized.
Options
.B dfnt
affect comparisons as in
.IR sort (1).
.TP
.B -i
Interactive.
There is no
.I string
argument; instead
.I look
takes lines from the standard input as strings to be looked up.
.TP
.B -x
Exact.
Print only lines of the file whose key matches
.I string
exactly.
.TP
.B -d
`Directory' order:
only letters, digits,
tabs and blanks participate in comparisons.
.TP
.B -f
Fold.
Upper case letters compare equal to lower case.
.TP
.B -n
Numeric comparison with initial string of digits, optional minus sign,
and optional decimal point.
.TP
.BR -t [ \f2c\f1 ]
Character
.I c
terminates the sort key in the
.IR file .
By default, tab terminates the key. If
.I c
is missing the entire line comprises the key.
.PP
If no
.I file
is specified,
.B /lib/words
is assumed, with collating sequence
.BR df .
.SH FILES
.B /lib/words
.SH SOURCE
.B \*9/src/cmd/look.c
.SH "SEE ALSO"
.IR sort (1),
.IR grep (1)
.SH DIAGNOSTICS
The exit status is
.RB `` "not found" ''
if no match is found, and
.RB `` "no dictionary" ''
if
.I file
or the default dictionary cannot be opened.

349
look/look.c Normal file
View File

@@ -0,0 +1,349 @@
#include <u.h>
#include <libc.h>
#include <bio.h>
/* Macros for Rune support of ctype.h-like functions */
#undef isupper
#undef islower
#undef isalpha
#undef isdigit
#undef isalnum
#undef isspace
#undef tolower
#define isupper(r) ('A' <= (r) && (r) <= 'Z')
#define islower(r) ('a' <= (r) && (r) <= 'z')
#define isalpha(r) (isupper(r) || islower(r))
#define islatin1(r) (0xC0 <= (r) && (r) <= 0xFF)
#define isdigit(r) ('0' <= (r) && (r) <= '9')
#define isalnum(r) (isalpha(r) || isdigit(r))
#define isspace(r) ((r) == ' ' || (r) == '\t' \
|| (0x0A <= (r) && (r) <= 0x0D))
#define tolower(r) ((r)-'A'+'a')
#define sgn(v) ((v) < 0 ? -1 : ((v) > 0 ? 1 : 0))
#define WORDSIZ 4000
char *filename = "#9/lib/words";
Biobuf *dfile;
Biobuf bout;
Biobuf bin;
int fold;
int direc;
int exact;
int iflag;
int rev = 1; /*-1 for reverse-ordered file, not implemented*/
int (*compare)(Rune*, Rune*);
Rune tab = '\t';
Rune entry[WORDSIZ];
Rune word[WORDSIZ];
Rune key[50], orig[50];
Rune latin_fold_tab[] =
{
/* Table to fold latin 1 characters to ASCII equivalents
based at Rune value 0xc0
À Á Â Ã Ä Å Æ Ç
È É Ê Ë Ì Í Î Ï
Ð Ñ Ò Ó Ô Õ Ö ×
Ø Ù Ú Û Ü Ý Þ ß
à á â ã ä å æ ç
è é ê ë ì í î ï
ð ñ ò ó ô õ ö ÷
ø ù ú û ü ý þ ÿ
*/
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 ,
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y',
};
int locate(void);
int acomp(Rune*, Rune*);
int getword(Biobuf*, Rune *rp, int n);
void torune(char*, Rune*);
void rcanon(Rune*, Rune*);
int ncomp(Rune*, Rune*);
void
main(int argc, char *argv[])
{
int n;
filename = unsharp(filename);
Binit(&bin, 0, OREAD);
Binit(&bout, 1, OWRITE);
compare = acomp;
ARGBEGIN{
case 'd':
direc++;
break;
case 'f':
fold++;
break;
case 'i':
iflag++;
break;
case 'n':
compare = ncomp;
break;
case 't':
chartorune(&tab,ARGF());
break;
case 'x':
exact++;
break;
default:
fprint(2, "%s: bad option %c\n", argv0, ARGC());
fprint(2, "usage: %s -[dfinx] [-t c] [string] [file]\n", argv0);
exits("usage");
} ARGEND
if(!iflag){
if(argc >= 1) {
torune(argv[0], orig);
argv++;
argc--;
} else
iflag++;
}
if(argc < 1) {
direc++;
fold++;
} else
filename = argv[0];
if (!iflag)
rcanon(orig, key);
dfile = Bopen(filename, OREAD);
if(dfile == 0) {
fprint(2, "look: can't open %s\n", filename);
exits("no dictionary");
}
if(!iflag)
if(!locate())
exits("not found");
do {
if(iflag) {
Bflush(&bout);
if(!getword(&bin, orig, sizeof(orig)/sizeof(orig[0])))
exits(0);
rcanon(orig, key);
if(!locate())
continue;
}
if (!exact || !acomp(word, key))
Bprint(&bout, "%S\n", entry);
while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
rcanon(entry, word);
n = compare(key, word);
switch(n) {
case -1:
if(exact)
break;
case 0:
if (!exact || !acomp(word, orig))
Bprint(&bout, "%S\n", entry);
continue;
}
break;
}
} while(iflag);
exits(0);
}
int
locate(void)
{
vlong top, bot, mid;
int c;
int n;
bot = 0;
top = Bseek(dfile, 0L, 2);
for(;;) {
mid = (top+bot) / 2;
Bseek(dfile, mid, 0);
do
c = Bgetrune(dfile);
while(c>=0 && c!='\n');
mid = Boffset(dfile);
if(!getword(dfile, entry, sizeof(entry)/sizeof(entry[0])))
break;
rcanon(entry, word);
n = compare(key, word);
switch(n) {
case -2:
case -1:
case 0:
if(top <= mid)
break;
top = mid;
continue;
case 1:
case 2:
bot = mid;
continue;
}
break;
}
Bseek(dfile, bot, 0);
while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
rcanon(entry, word);
n = compare(key, word);
switch(n) {
case -2:
return 0;
case -1:
if(exact)
return 0;
case 0:
return 1;
case 1:
case 2:
continue;
}
}
return 0;
}
/*
* acomp(s, t) returns:
* -2 if s strictly precedes t
* -1 if s is a prefix of t
* 0 if s is the same as t
* 1 if t is a prefix of s
* 2 if t strictly precedes s
*/
int
acomp(Rune *s, Rune *t)
{
int cs, ct;
for(;;) {
cs = *s;
ct = *t;
if(cs != ct)
break;
if(cs == 0)
return 0;
s++;
t++;
}
if(cs == 0)
return -1;
if(ct == 0)
return 1;
if(cs < ct)
return -2;
return 2;
}
void
torune(char *old, Rune *new)
{
do old += chartorune(new, old);
while(*new++);
}
void
rcanon(Rune *old, Rune *new)
{
Rune r;
while((r = *old++) && r != tab) {
if (islatin1(r) && latin_fold_tab[r-0xc0])
r = latin_fold_tab[r-0xc0];
if(direc)
if(!(isalnum(r) || r == ' ' || r == '\t'))
continue;
if(fold)
if(isupper(r))
r = tolower(r);
*new++ = r;
}
*new = 0;
}
int
ncomp(Rune *s, Rune *t)
{
Rune *is, *it, *js, *jt;
int a, b;
int ssgn, tsgn;
while(isspace(*s))
s++;
while(isspace(*t))
t++;
ssgn = tsgn = -2*rev;
if(*s == '-') {
s++;
ssgn = -ssgn;
}
if(*t == '-') {
t++;
tsgn = -tsgn;
}
for(is = s; isdigit(*is); is++)
;
for(it = t; isdigit(*it); it++)
;
js = is;
jt = it;
a = 0;
if(ssgn == tsgn)
while(it>t && is>s)
if(b = *--it - *--is)
a = b;
while(is > s)
if(*--is != '0')
return -ssgn;
while(it > t)
if(*--it != '0')
return tsgn;
if(a)
return sgn(a)*ssgn;
if(*(s=js) == '.')
s++;
if(*(t=jt) == '.')
t++;
if(ssgn == tsgn)
while(isdigit(*s) && isdigit(*t))
if(a = *t++ - *s++)
return sgn(a)*ssgn;
while(isdigit(*s))
if(*s++ != '0')
return -ssgn;
while(isdigit(*t))
if(*t++ != '0')
return tsgn;
return 0;
}
int
getword(Biobuf *f, Rune *rp, int n)
{
long c;
while(n-- > 0) {
c = Bgetrune(f);
if(c < 0)
return 0;
if(c == '\n') {
*rp = '\0';
return 1;
}
*rp++ = c;
}
fprint(2, "Look: word too long. Bailing out.\n");
return 0;
}

10
pbd/Makefile Normal file
View File

@@ -0,0 +1,10 @@
# pbd - pbd unix port from plan9
# Depends on ../lib9
TARG = pbd
include ../std.mk
pre-uninstall:
post-install:

0
pbd/pbd.1 Normal file
View File

19
pbd/pbd.c Normal file
View File

@@ -0,0 +1,19 @@
#include <u.h>
#include <libc.h>
void
main(void)
{
char buf[512], *p;
p = "???";
if(getwd(buf, sizeof buf)){
p = strrchr(buf, '/');
if(p == nil)
p = buf;
else if(p>buf || p[1]!='\0')
p++;
}
write(1, p, strlen(p));
exits(0);
}

View File

@@ -46,7 +46,7 @@ uninstall:
@${CC} ${CFLAGS} -I../lib9 -I${PREFIX}/include -I../lib9 $*.c
clean:
rm -f ${OFILES} ${TARG} y.tab.c y.tab.h
rm -f ${OFILES} ${TARG} y.tab.c y.tab.h x.tab.h
${TARG}: ${OFILES}
@echo LD ${TARG}

10
split/Makefile Normal file
View File

@@ -0,0 +1,10 @@
# split - split unix port from plan9
# Depends on ../lib9
TARG = split
include ../std.mk
pre-uninstall:
post-install:

82
split/split.1 Normal file
View File

@@ -0,0 +1,82 @@
.TH SPLIT 1
.CT 1 files
.SH NAME
split \- split a file into pieces
.SH SYNOPSIS
.B split
[
.I option ...
]
[
.I file
]
.SH DESCRIPTION
.I Split
reads
.I file
(standard input by default)
and writes it in pieces of 1000
lines per output file.
The names of the
output files are
.BR xaa ,
.BR xab ,
and so on to
.BR xzz .
The options are
.TP
.BI -n " n"
Split into
.IR n -line
pieces.
.TP
.BI -l " n"
Synonym for
.B -n
.IR n ,
a nod to Unix's syntax.
.TP
.BI -e " expression"
File divisions occur at each line
that matches a regular
.IR expression ;
see
.IR regexp (7).
Multiple
.B -e
options may appear.
If a subexpression of
.I expression
is contained in parentheses
.BR ( ... ) ,
the output file name is the portion of the
line which matches the subexpression.
.TP
.BI -f " stem
Use
.I stem
instead of
.B x
in output file names.
.TP
.BI -s " suffix
Append
.I suffix
to names identified under
.BR -e .
.TP
.B -x
Exclude the matched input line from the output file.
.TP
.B -i
Ignore case in option
.BR -e ;
force output file names (excluding the suffix)
to lower case.
.SH SOURCE
.B \*9/src/cmd/split.c
.SH SEE ALSO
.IR sed (1),
.IR awk (1),
.IR grep (1),
.IR regexp (7)

189
split/split.c Normal file
View File

@@ -0,0 +1,189 @@
#include <u.h>
#include <libc.h>
#include <bio.h>
#include <ctype.h>
#include <regexp.h>
char digit[] = "0123456789";
char *suffix = "";
char *stem = "x";
char suff[] = "aa";
char name[200];
Biobuf bout;
Biobuf *output = &bout;
extern int nextfile(void);
extern int matchfile(Resub*);
extern void openf(void);
extern char *fold(char*,int);
extern void usage(void);
extern void badexp(void);
void
main(int argc, char *argv[])
{
Reprog *exp;
char *pattern = 0;
int n = 1000;
char *line;
int xflag = 0;
int iflag = 0;
Biobuf bin;
Biobuf *b = &bin;
char buf[256];
ARGBEGIN {
case 'l':
case 'n':
n=atoi(EARGF(usage()));
break;
case 'e':
pattern = strdup(EARGF(usage()));
break;
case 'f':
stem = strdup(EARGF(usage()));
break;
case 's':
suffix = strdup(EARGF(usage()));
break;
case 'x':
xflag++;
break;
case 'i':
iflag++;
break;
default:
usage();
break;
} ARGEND;
if(argc < 0 || argc > 1)
usage();
if(argc != 0) {
b = Bopen(argv[0], OREAD);
if(b == nil) {
fprint(2, "split: can't open %s: %r\n", argv[0]);
exits("open");
}
} else
Binit(b, 0, OREAD);
if(pattern) {
if(!(exp = regcomp(iflag? fold(pattern,strlen(pattern)): pattern)))
badexp();
while((line=Brdline(b,'\n')) != 0) {
Resub match[2];
memset(match, 0, sizeof match);
line[Blinelen(b)-1] = 0;
if(regexec(exp,iflag?fold(line,Blinelen(b)-1):line,match,2)) {
if(matchfile(match) && xflag)
continue;
} else if(output == 0)
nextfile(); /* at most once */
Bwrite(output, line, Blinelen(b)-1);
Bputc(output, '\n');
}
} else {
int linecnt = n;
while((line=Brdline(b,'\n')) != 0) {
if(++linecnt > n) {
nextfile();
linecnt = 1;
}
Bwrite(output, line, Blinelen(b));
}
/*
* in case we didn't end with a newline, tack whatever's
* left onto the last file
*/
while((n = Bread(b, buf, sizeof(buf))) > 0)
Bwrite(output, buf, n);
}
if(b != nil)
Bterm(b);
exits(0);
}
int
nextfile(void)
{
static int canopen = 1;
if(suff[0] > 'z') {
if(canopen)
fprint(2, "split: file %szz not split\n",stem);
canopen = 0;
} else {
strcpy(name, stem);
strcat(name, suff);
if(++suff[1] > 'z')
suff[1] = 'a', ++suff[0];
openf();
}
return canopen;
}
int
matchfile(Resub *match)
{
if(match[1].s.sp) {
int len = match[1].e.ep - match[1].s.sp;
strncpy(name, match[1].s.sp, len);
strcpy(name+len, suffix);
openf();
return 1;
}
return nextfile();
}
void
openf(void)
{
static int fd = 0;
Bflush(output);
Bterm(output);
if(fd > 0)
close(fd);
fd = create(name,OWRITE,0666);
if(fd < 0) {
fprint(2, "grep: can't create %s: %r\n", name);
exits("create");
}
Binit(output, fd, OWRITE);
}
char *
fold(char *s, int n)
{
static char *fline;
static int linesize = 0;
char *t;
if(linesize < n+1){
fline = realloc(fline,n+1);
linesize = n+1;
}
for(t=fline; *t++ = tolower((uchar)*s++); )
continue;
/* we assume the 'A'-'Z' only appear as themselves
* in a utf encoding.
*/
return fline;
}
void
usage(void)
{
fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i] [file]\n");
exits("usage");
}
void
badexp(void)
{
fprint(2, "split: bad regular expression\n");
exits("bad regular expression");
}

10
strings/Makefile Normal file
View File

@@ -0,0 +1,10 @@
# strings - strings unix port from plan9
# Depends on ../lib9
TARG = strings
include ../std.mk
pre-uninstall:
post-install:

28
strings/strings.1 Normal file
View File

@@ -0,0 +1,28 @@
.TH STRINGS 1
.SH NAME
strings \- extract printable strings
.SH SYNOPSIS
.B strings
[
.I file ...
]
.SH DESCRIPTION
.I Strings
finds and prints strings containing 6 or more
consecutive printable UTF-encoded characters
in a (typically) binary file, default
standard input.
Printable characters are taken to be
.SM ASCII
characters from blank through tilde (hexadecimal 20 through 7E), inclusive,
and
all other characters from value 00A0 to FFFF.
Strings reports
the decimal offset within the file at which the string starts and the text
of the string. If the string is longer than 70 runes the line is
terminated by three dots and the printing is resumed on the next
line with the offset of the continuation line.
.SH SOURCE
.B \*9/src/cmd/strings.c
.SH SEE ALSO
.IR nm (1)

90
strings/strings.c Normal file
View File

@@ -0,0 +1,90 @@
#include <u.h>
#include <libc.h>
#include <bio.h>
Biobuf *fin;
Biobuf fout;
#define MINSPAN 6 /* Min characters in string */
#define BUFSIZE 70
void stringit(char *);
#undef isprint
#define isprint risprint
int isprint(Rune);
void
main(int argc, char **argv)
{
int i;
Binit(&fout, 1, OWRITE);
if(argc < 2) {
stringit("/dev/stdin");
exits(0);
}
for(i = 1; i < argc; i++) {
if(argc > 2)
print("%s:\n", argv[i]);
stringit(argv[i]);
}
exits(0);
}
void
stringit(char *str)
{
long posn, start;
int cnt = 0;
long c;
Rune buf[BUFSIZE];
if ((fin = Bopen(str, OREAD)) == 0) {
perror("open");
return;
}
start = 0;
posn = Boffset(fin);
while((c = Bgetrune(fin)) >= 0) {
if(isprint(c)) {
if(start == 0)
start = posn;
buf[cnt++] = c;
if(cnt == BUFSIZE-1) {
buf[cnt] = 0;
Bprint(&fout, "%8ld: %S ...\n", start, buf);
start = 0;
cnt = 0;
}
} else {
if(cnt >= MINSPAN) {
buf[cnt] = 0;
Bprint(&fout, "%8ld: %S\n", start, buf);
}
start = 0;
cnt = 0;
}
posn = Boffset(fin);
}
if(cnt >= MINSPAN){
buf[cnt] = 0;
Bprint(&fout, "%8ld: %S\n", start, buf);
}
Bterm(fin);
}
int
isprint(Rune r)
{
if ((r >= ' ' && r <0x7f) || r > 0xA0)
return 1;
else
return 0;
}

10
unicode/Makefile Normal file
View File

@@ -0,0 +1,10 @@
# unicode - unicode unix port from plan9
# Depends on ../lib9
TARG = unicode
include ../std.mk
pre-uninstall:
post-install:

0
unicode/unicode.1 Normal file
View File

122
unicode/unicode.c Normal file
View File

@@ -0,0 +1,122 @@
#include <u.h>
#include <libc.h>
#include <bio.h>
char usage[] = "unicode { [-t] hex hex ... | hexmin-hexmax ... | [-n] char ... }";
char hex[] = "0123456789abcdefABCDEF";
int numout = 0;
int text = 0;
char *err;
Biobuf bout;
char *range(char*[]);
char *nums(char*[]);
char *chars(char*[]);
void
main(int argc, char *argv[])
{
ARGBEGIN{
case 'n':
numout = 1;
break;
case 't':
text = 1;
break;
}ARGEND
Binit(&bout, 1, OWRITE);
if(argc == 0){
fprint(2, "usage: %s\n", usage);
exits("usage");
}
if(!numout && utfrune(argv[0], '-'))
exits(range(argv));
if(numout || strchr(hex, argv[0][0])==0)
exits(nums(argv));
exits(chars(argv));
}
char*
range(char *argv[])
{
char *q;
int min, max;
int i;
while(*argv){
q = *argv;
if(strchr(hex, q[0]) == 0){
err:
fprint(2, "unicode: bad range %s\n", *argv);
return "bad range";
}
min = strtoul(q, &q, 16);
if(min<0 || min>Runemax || *q!='-')
goto err;
q++;
if(strchr(hex, *q) == 0)
goto err;
max = strtoul(q, &q, 16);
if(max<0 || max>Runemax || max<min || *q!=0)
goto err;
i = 0;
do{
Bprint(&bout, "%.4x %C", min, min);
i++;
if(min==max || (i&7)==0)
Bprint(&bout, "\n");
else
Bprint(&bout, "\t");
min++;
}while(min<=max);
argv++;
}
return 0;
}
char*
nums(char *argv[])
{
char *q;
Rune r;
int w;
while(*argv){
q = *argv;
while(*q){
w = chartorune(&r, q);
if(r==0x80 && (q[0]&0xFF)!=0x80){
fprint(2, "unicode: invalid utf string %s\n", *argv);
return "bad utf";
}
Bprint(&bout, "%.4x\n", r);
q += w;
}
argv++;
}
return 0;
}
char*
chars(char *argv[])
{
char *q;
int m;
while(*argv){
q = *argv;
if(strchr(hex, q[0]) == 0){
err:
fprint(2, "unicode: bad unicode value %s\n", *argv);
return "bad char";
}
m = strtoul(q, &q, 16);
if(m<0 || m>Runemax || *q!=0)
goto err;
Bprint(&bout, "%C", m);
if(!text)
Bprint(&bout, "\n");
argv++;
}
return 0;
}

10
unutf/Makefile Normal file
View File

@@ -0,0 +1,10 @@
# unutf - unutf unix port from plan9
# Depends on ../lib9
TARG = unutf
include ../std.mk
pre-uninstall:
post-install:

0
unutf/unutf.1 Normal file
View File

20
unutf/unutf.c Normal file
View File

@@ -0,0 +1,20 @@
/*
* stupid little program to pipe unicode chars through
* when converting to non-utf compilers.
*/
#include <u.h>
#include <libc.h>
#include <bio.h>
Biobuf bin;
void
main(void)
{
int c;
Binit(&bin, 0, OREAD);
while((c = Bgetrune(&bin)) >= 0)
print("0x%ux\n", c);
exits(0);
}