mirror of
https://git.suckless.org/9base
synced 2025-09-02 21:33:48 -07:00
added commands as discussed with Uriel yesterday
This commit is contained in:
53
Makefile
53
Makefile
@@ -2,9 +2,56 @@
|
||||
|
||||
include config.mk
|
||||
|
||||
SUBDIRS = lib9 yacc awk basename bc cal cat cleanname date dc du dd echo ed \
|
||||
factor fortune fmt freq getflags grep hoc ls mk mkdir mtime primes \
|
||||
rc read sha1sum sed seq sleep sort tail tee test touch tr troff uniq
|
||||
SUBDIRS = lib9\
|
||||
yacc\
|
||||
ascii\
|
||||
awk\
|
||||
basename\
|
||||
bc\
|
||||
cal\
|
||||
cat\
|
||||
cleanname\
|
||||
cmp\
|
||||
date\
|
||||
dc\
|
||||
du\
|
||||
dd\
|
||||
diff\
|
||||
echo\
|
||||
ed\
|
||||
factor\
|
||||
fortune\
|
||||
fmt\
|
||||
freq\
|
||||
getflags\
|
||||
grep\
|
||||
hoc\
|
||||
join\
|
||||
look\
|
||||
ls\
|
||||
mk\
|
||||
mkdir\
|
||||
mtime\
|
||||
pbd\
|
||||
primes\
|
||||
rc\
|
||||
read\
|
||||
sha1sum\
|
||||
sed\
|
||||
seq\
|
||||
sleep\
|
||||
sort\
|
||||
split\
|
||||
strings\
|
||||
tail\
|
||||
tee\
|
||||
test\
|
||||
touch\
|
||||
tr\
|
||||
troff\
|
||||
unicode\
|
||||
uniq\
|
||||
unutf\
|
||||
|
||||
all:
|
||||
@echo 9base build options:
|
||||
|
11
TODO
11
TODO
@@ -1,11 +0,0 @@
|
||||
12:13 < uriel> garbeam: add dd and diff too
|
||||
12:13 < uriel> and split
|
||||
12:14 < uriel> (and join)
|
||||
12:15 < uriel> and unutf (which I just noticed, seems to be undocumented, but seems quite useful too)
|
||||
12:15 < uriel> and tcs
|
||||
12:16 < uriel> and strings
|
||||
12:18 < uriel> oh, oh, I'm finding some great bits:
|
||||
12:18 < uriel> look(1), ascii(1) and unicode(1)
|
||||
12:19 < uriel> ok, and cmp(1) is missing too
|
||||
12:23 < uriel> hah! plan9/src/cmd/index/ is really interesting (but not worth including)
|
||||
12:26 < uriel> oh! pbd! what a wonderful discovery, we certainly should add it too
|
10
ascii/Makefile
Normal file
10
ascii/Makefile
Normal file
@@ -0,0 +1,10 @@
|
||||
# ascii - ascii unix port from plan9
|
||||
# Depends on ../lib9
|
||||
|
||||
TARG = ascii
|
||||
|
||||
include ../std.mk
|
||||
|
||||
pre-uninstall:
|
||||
|
||||
post-install:
|
160
ascii/ascii.1
Normal file
160
ascii/ascii.1
Normal file
@@ -0,0 +1,160 @@
|
||||
.TH ASCII 1
|
||||
.SH NAME
|
||||
ascii, unicode \- interpret ASCII, Unicode characters
|
||||
.SH SYNOPSIS
|
||||
.B ascii
|
||||
[
|
||||
.B -8
|
||||
]
|
||||
[
|
||||
.BI -oxdb n
|
||||
]
|
||||
[
|
||||
.B -nct
|
||||
]
|
||||
[
|
||||
.I text
|
||||
]
|
||||
.PP
|
||||
.B unicode
|
||||
[
|
||||
.B -nt
|
||||
]
|
||||
.IB hexmin - hexmax
|
||||
.PP
|
||||
.B unicode
|
||||
[
|
||||
.B -t
|
||||
]
|
||||
.I hex
|
||||
[
|
||||
\&...
|
||||
]
|
||||
.PP
|
||||
.B unicode
|
||||
[
|
||||
.B -n
|
||||
]
|
||||
.I characters
|
||||
.PP
|
||||
.B look
|
||||
.I hex
|
||||
.B \*9/lib/unicode
|
||||
.SH DESCRIPTION
|
||||
.I Ascii
|
||||
prints the
|
||||
.SM ASCII
|
||||
values corresponding to characters and
|
||||
.I vice
|
||||
.IR versa ;
|
||||
under the
|
||||
.B -8
|
||||
option, the
|
||||
.SM ISO
|
||||
Latin-1 extensions (codes 0200-0377) are included.
|
||||
The values are interpreted in a settable numeric base;
|
||||
.B -o
|
||||
specifies octal,
|
||||
.B -d
|
||||
decimal,
|
||||
.B -x
|
||||
hexadecimal (the default), and
|
||||
.BI -b n
|
||||
base
|
||||
.IR n .
|
||||
.PP
|
||||
With no arguments,
|
||||
.I ascii
|
||||
prints a table of the character set in the specified base.
|
||||
Characters of
|
||||
.I text
|
||||
are converted to their
|
||||
.SM ASCII
|
||||
values, one per line. If, however, the first
|
||||
.I text
|
||||
argument is a valid number in the specified base, conversion
|
||||
goes the opposite way.
|
||||
Control characters are printed as two- or three-character mnemonics.
|
||||
Other options are:
|
||||
.TP
|
||||
.B -n
|
||||
Force numeric output.
|
||||
.TP
|
||||
.B -c
|
||||
Force character output.
|
||||
.TP
|
||||
.B -t
|
||||
Convert from numbers to running text; do not interpret
|
||||
control characters or insert newlines.
|
||||
.PP
|
||||
.I Unicode
|
||||
is similar; it converts between
|
||||
.SM UTF
|
||||
and character values from the Unicode Standard (see
|
||||
.IR utf (7)).
|
||||
If given a range of hexadecimal numbers,
|
||||
.I unicode
|
||||
prints a table of the specified Unicode characters \(em their values and
|
||||
.SM UTF
|
||||
representations.
|
||||
Otherwise it translates from
|
||||
.SM UTF
|
||||
to numeric value or vice versa,
|
||||
depending on the appearance of the supplied text;
|
||||
the
|
||||
.B -n
|
||||
option forces numeric output to avoid ambiguity with numeric characters.
|
||||
If converting to
|
||||
.SM UTF ,
|
||||
the characters are printed one per line unless the
|
||||
.B -t
|
||||
flag is set, in which case the output is a single string
|
||||
containing only the specified characters.
|
||||
Unlike
|
||||
.IR ascii ,
|
||||
.I unicode
|
||||
treats no characters specially.
|
||||
.PP
|
||||
The output of
|
||||
.I ascii
|
||||
and
|
||||
.I unicode
|
||||
may be unhelpful if the characters printed are not available in the current font.
|
||||
.PP
|
||||
The file
|
||||
.B \*9/lib/unicode
|
||||
contains a
|
||||
table of characters and descriptions, sorted in hexadecimal order,
|
||||
suitable for
|
||||
.IR look (1)
|
||||
on the lower case
|
||||
.I hex
|
||||
values of characters.
|
||||
.SH EXAMPLES
|
||||
.TP
|
||||
.B "ascii -d"
|
||||
Print the
|
||||
.SM ASCII
|
||||
table base 10.
|
||||
.TP
|
||||
.B "unicode p"
|
||||
Print the hex value of `p'.
|
||||
.TP
|
||||
.B "unicode 2200-22f1"
|
||||
Print a table of miscellaneous mathematical symbols.
|
||||
.TP
|
||||
.B "look 039 \*9/lib/unicode"
|
||||
See the start of the Greek alphabet's encoding in the Unicode Standard.
|
||||
.SH FILES
|
||||
.TP
|
||||
.B \*9/lib/unicode
|
||||
table of characters and descriptions.
|
||||
.SH SOURCE
|
||||
.B \*9/src/cmd/ascii.c
|
||||
.br
|
||||
.B \*9/src/cmd/unicode.c
|
||||
.SH "SEE ALSO"
|
||||
.IR look (1),
|
||||
.IR tcs (1),
|
||||
.IR utf (7),
|
||||
.IR font (7)
|
181
ascii/ascii.c
Normal file
181
ascii/ascii.c
Normal file
@@ -0,0 +1,181 @@
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <bio.h>
|
||||
|
||||
#define MAXBASE 36
|
||||
|
||||
void usage(void);
|
||||
void put(int);
|
||||
void putn(int, int);
|
||||
void puttext(char *);
|
||||
void putnum(char *);
|
||||
int btoi(char *);
|
||||
int value(int, int);
|
||||
int isnum(char *);
|
||||
|
||||
char *str[256]={
|
||||
"nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
|
||||
"bs ", "ht ", "nl ", "vt ", "np ", "cr ", "so ", "si ",
|
||||
"dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
|
||||
"can", "em ", "sub", "esc", "fs ", "gs ", "rs ", "us ",
|
||||
"sp ", " ! ", " \" ", " # ", " $ ", " % ", " & ", " ' ",
|
||||
" ( ", " ) ", " * ", " + ", " , ", " - ", " . ", " / ",
|
||||
" 0 ", " 1 ", " 2 ", " 3 ", " 4 ", " 5 ", " 6 ", " 7 ",
|
||||
" 8 ", " 9 ", " : ", " ; ", " < ", " = ", " > ", " ? ",
|
||||
" @ ", " A ", " B ", " C ", " D ", " E ", " F ", " G ",
|
||||
" H ", " I ", " J ", " K ", " L ", " M ", " N ", " O ",
|
||||
" P ", " Q ", " R ", " S ", " T ", " U ", " V ", " W ",
|
||||
" X ", " Y ", " Z ", " [ ", " \\ ", " ] ", " ^ ", " _ ",
|
||||
" ` ", " a ", " b ", " c ", " d ", " e ", " f ", " g ",
|
||||
" h ", " i ", " j ", " k ", " l ", " m ", " n ", " o ",
|
||||
" p ", " q ", " r ", " s ", " t ", " u ", " v ", " w ",
|
||||
" x ", " y ", " z ", " { ", " | ", " } ", " ~ ", "del",
|
||||
"x80", "x81", "x82", "x83", "x84", "x85", "x86", "x87",
|
||||
"x88", "x89", "x8a", "x8b", "x8c", "x8d", "x8e", "x8f",
|
||||
"x90", "x91", "x92", "x93", "x94", "x95", "x96", "x97",
|
||||
"x98", "x99", "x9a", "x9b", "x9c", "x9d", "x9e", "x9f",
|
||||
"xa0", " ¡ ", " ¢ ", " £ ", " ¤ ", " ¥ ", " ¦ ", " § ",
|
||||
" ¨ ", " © ", " ª ", " « ", " ¬ ", " ", " ® ", " ¯ ",
|
||||
" ° ", " ± ", " ² ", " ³ ", " ´ ", " µ ", " ¶ ", " · ",
|
||||
" ¸ ", " ¹ ", " º ", " » ", " ¼ ", " ½ ", " ¾ ", " ¿ ",
|
||||
" À ", " Á ", " Â ", " Ã ", " Ä ", " Å ", " Æ ", " Ç ",
|
||||
" È ", " É ", " Ê ", " Ë ", " Ì ", " Í ", " Î ", " Ï ",
|
||||
" Ð ", " Ñ ", " Ò ", " Ó ", " Ô ", " Õ ", " Ö ", " × ",
|
||||
" Ø ", " Ù ", " Ú ", " Û ", " Ü ", " Ý ", " Þ ", " ß ",
|
||||
" à ", " á ", " â ", " ã ", " ä ", " å ", " æ ", " ç ",
|
||||
" è ", " é ", " ê ", " ë ", " ì ", " í ", " î ", " ï ",
|
||||
" ð ", " ñ ", " ò ", " ó ", " ô ", " õ ", " ö ", " ÷ ",
|
||||
" ø ", " ù ", " ú ", " û ", " ü ", " ý ", " þ ", " ÿ "
|
||||
};
|
||||
|
||||
char Ncol[]={
|
||||
0,0,7,5,4,4,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
};
|
||||
|
||||
int nchars=128;
|
||||
int base=16;
|
||||
int ncol;
|
||||
int text=1;
|
||||
int strip=0;
|
||||
Biobuf bin;
|
||||
|
||||
void
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
int i;
|
||||
|
||||
Binit(&bin, 1, OWRITE);
|
||||
ARGBEGIN{
|
||||
case '8':
|
||||
nchars=256; break;
|
||||
case 'x':
|
||||
base=16; break;
|
||||
case 'o':
|
||||
base=8; break;
|
||||
case 'd':
|
||||
base=10; break;
|
||||
case 'b':
|
||||
base=strtoul(EARGF(usage()), 0, 0);
|
||||
if(base<2||base>MAXBASE)
|
||||
usage();
|
||||
break;
|
||||
case 'n':
|
||||
text=0; break;
|
||||
case 't':
|
||||
strip=1;
|
||||
/* fall through */
|
||||
case 'c':
|
||||
text=2; break;
|
||||
default:
|
||||
usage();
|
||||
}ARGEND
|
||||
|
||||
ncol=Ncol[base];
|
||||
if(argc==0){
|
||||
for(i=0;i<nchars;i++){
|
||||
put(i);
|
||||
if((i&7)==7)
|
||||
Bprint(&bin, "|\n");
|
||||
}
|
||||
}else{
|
||||
if(text==1)
|
||||
text=isnum(argv[0]);
|
||||
while(argc--)
|
||||
if(text)
|
||||
puttext(*argv++);
|
||||
else
|
||||
putnum(*argv++);
|
||||
}
|
||||
Bputc(&bin, '\n');
|
||||
exits(0);
|
||||
}
|
||||
void
|
||||
usage(void)
|
||||
{
|
||||
fprint(2, "usage: %s [-8] [-xod | -b8] [-ncst] [--] [text]\n", argv0);
|
||||
exits("usage");
|
||||
}
|
||||
void
|
||||
put(int i)
|
||||
{
|
||||
Bputc(&bin, '|');
|
||||
putn(i, ncol);
|
||||
Bprint(&bin, " %s", str[i]);
|
||||
}
|
||||
char dig[]="0123456789abcdefghijklmnopqrstuvwxyz";
|
||||
void
|
||||
putn(int n, int ndig)
|
||||
{
|
||||
if(ndig==0)
|
||||
return;
|
||||
putn(n/base, ndig-1);
|
||||
Bputc(&bin, dig[n%base]);
|
||||
}
|
||||
void
|
||||
puttext(char *s)
|
||||
{
|
||||
int n;
|
||||
n=btoi(s)&0377;
|
||||
if(strip)
|
||||
Bputc(&bin, n);
|
||||
else
|
||||
Bprint(&bin, "%s\n", str[n]);
|
||||
}
|
||||
void
|
||||
putnum(char *s)
|
||||
{
|
||||
while(*s){
|
||||
putn(*s++&0377, ncol);
|
||||
Bputc(&bin, '\n');
|
||||
}
|
||||
}
|
||||
int
|
||||
btoi(char *s)
|
||||
{
|
||||
int n;
|
||||
n=0;
|
||||
while(*s)
|
||||
n=n*base+value(*s++, 0);
|
||||
return(n);
|
||||
}
|
||||
int
|
||||
value(int c, int f)
|
||||
{
|
||||
char *s;
|
||||
for(s=dig; s<dig+base; s++)
|
||||
if(*s==c)
|
||||
return(s-dig);
|
||||
if(f)
|
||||
return(-1);
|
||||
fprint(2, "%s: bad input char %c\n", argv0, c);
|
||||
exits("bad");
|
||||
return 0; /* to keep ken happy */
|
||||
}
|
||||
int
|
||||
isnum(char *s)
|
||||
{
|
||||
while(*s)
|
||||
if(value(*s++, 1)==-1)
|
||||
return(0);
|
||||
return(1);
|
||||
}
|
10
cmp/Makefile
Normal file
10
cmp/Makefile
Normal file
@@ -0,0 +1,10 @@
|
||||
# cmp - cmp unix port from plan9
|
||||
# Depends on ../lib9
|
||||
|
||||
TARG = cmp
|
||||
|
||||
include ../std.mk
|
||||
|
||||
pre-uninstall:
|
||||
|
||||
post-install:
|
57
cmp/cmp.1
Normal file
57
cmp/cmp.1
Normal file
@@ -0,0 +1,57 @@
|
||||
.TH CMP 1
|
||||
.SH NAME
|
||||
cmp \- compare two files
|
||||
.SH SYNOPSIS
|
||||
.B cmp
|
||||
[
|
||||
.B -lsL
|
||||
]
|
||||
.I file1 file2
|
||||
[
|
||||
.I offset1
|
||||
[
|
||||
.I offset2
|
||||
]
|
||||
]
|
||||
.SH DESCRIPTION
|
||||
The two files are
|
||||
compared.
|
||||
A diagnostic results if the contents differ, otherwise
|
||||
there is no output.
|
||||
.PP
|
||||
The options are:
|
||||
.TP
|
||||
.B l
|
||||
Print the byte number (decimal) and the
|
||||
differing bytes (hexadecimal) for each difference.
|
||||
.TP
|
||||
.B s
|
||||
Print nothing for differing files,
|
||||
but set the exit status.
|
||||
.TP
|
||||
.B L
|
||||
Print the line number of the first differing byte.
|
||||
.PP
|
||||
If offsets are given,
|
||||
comparison starts at the designated byte position
|
||||
of the corresponding file.
|
||||
Offsets that begin with
|
||||
.B 0x
|
||||
are hexadecimal;
|
||||
with
|
||||
.BR 0 ,
|
||||
octal; with anything else, decimal.
|
||||
.SH SOURCE
|
||||
.B \*9/src/cmd/cmp.c
|
||||
.SH "SEE ALSO"
|
||||
.IR diff (1)
|
||||
.SH DIAGNOSTICS
|
||||
If a file is inaccessible or missing, the exit status is
|
||||
.LR open .
|
||||
If the files are the same, the exit status is empty (true).
|
||||
If they are the same except that one is longer than the other, the exit status is
|
||||
.LR EOF .
|
||||
Otherwise
|
||||
.I cmp
|
||||
reports the position of the first disagreeing byte and the exit status is
|
||||
.LR differ .
|
112
cmp/cmp.c
Normal file
112
cmp/cmp.c
Normal file
@@ -0,0 +1,112 @@
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
|
||||
#define BUF 65536
|
||||
|
||||
int sflag = 0;
|
||||
int lflag = 0;
|
||||
int Lflag = 0;
|
||||
|
||||
static void usage(void);
|
||||
|
||||
void
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
int n, i;
|
||||
uchar *p, *q;
|
||||
uchar buf1[BUF], buf2[BUF];
|
||||
int f1, f2;
|
||||
vlong nc = 1, o, l = 1;
|
||||
char *name1, *name2;
|
||||
uchar *b1s, *b1e, *b2s, *b2e;
|
||||
|
||||
ARGBEGIN{
|
||||
case 's': sflag = 1; break;
|
||||
case 'l': lflag = 1; break;
|
||||
case 'L': Lflag = 1; break;
|
||||
default: usage();
|
||||
}ARGEND
|
||||
if(argc < 2)
|
||||
usage();
|
||||
if((f1 = open(name1 = *argv++, OREAD)) == -1){
|
||||
if(!sflag) perror(name1);
|
||||
exits("open");
|
||||
}
|
||||
if((f2 = open(name2 = *argv++, OREAD)) == -1){
|
||||
if(!sflag) perror(name2);
|
||||
exits("open");
|
||||
}
|
||||
if(*argv){
|
||||
o = strtoll(*argv++, 0, 0);
|
||||
if(seek(f1, o, 0) < 0){
|
||||
if(!sflag) perror("cmp: seek by offset1");
|
||||
exits("seek 1");
|
||||
}
|
||||
}
|
||||
if(*argv){
|
||||
o = strtoll(*argv++, 0, 0);
|
||||
if(seek(f2, o, 0) < 0){
|
||||
if(!sflag) perror("cmp: seek by offset2");
|
||||
exits("seek 2");
|
||||
}
|
||||
}
|
||||
if(*argv)
|
||||
usage();
|
||||
b1s = b1e = buf1;
|
||||
b2s = b2e = buf2;
|
||||
for(;;){
|
||||
if(b1s >= b1e){
|
||||
if(b1s >= &buf1[BUF])
|
||||
b1s = buf1;
|
||||
n = read(f1, b1s, &buf1[BUF] - b1s);
|
||||
b1e = b1s + n;
|
||||
}
|
||||
if(b2s >= b2e){
|
||||
if(b2s >= &buf2[BUF])
|
||||
b2s = buf2;
|
||||
n = read(f2, b2s, &buf2[BUF] - b2s);
|
||||
b2e = b2s + n;
|
||||
}
|
||||
n = b2e - b2s;
|
||||
if(n > b1e - b1s)
|
||||
n = b1e - b1s;
|
||||
if(n <= 0)
|
||||
break;
|
||||
if(memcmp((void *)b1s, (void *)b2s, n) != 0){
|
||||
if(sflag)
|
||||
exits("differ");
|
||||
for(p = b1s, q = b2s, i = 0; i < n; p++, q++, i++) {
|
||||
if(*p == '\n')
|
||||
l++;
|
||||
if(*p != *q){
|
||||
if(!lflag){
|
||||
print("%s %s differ: char %lld",
|
||||
name1, name2, nc+i);
|
||||
print(Lflag?" line %lld\n":"\n", l);
|
||||
exits("differ");
|
||||
}
|
||||
print("%6lld 0x%.2x 0x%.2x\n", nc+i, *p, *q);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(Lflag)
|
||||
for(p = b1s; p < b1e;)
|
||||
if(*p++ == '\n')
|
||||
l++;
|
||||
nc += n;
|
||||
b1s += n;
|
||||
b2s += n;
|
||||
}
|
||||
if(b1e - b1s == b2e - b2s)
|
||||
exits((char *)0);
|
||||
if(!sflag)
|
||||
print("EOF on %s\n", (b1e - b1s > b2e - b2s)? name2 : name1);
|
||||
exits("EOF");
|
||||
}
|
||||
|
||||
static void
|
||||
usage(void)
|
||||
{
|
||||
print("Usage: cmp [-lsL] file1 file2 [offset1 [offset2] ]\n");
|
||||
exits("usage");
|
||||
}
|
10
dd/Makefile
Normal file
10
dd/Makefile
Normal file
@@ -0,0 +1,10 @@
|
||||
# dd - dd unix port from plan9
|
||||
# Depends on ../lib9
|
||||
|
||||
TARG = dd
|
||||
|
||||
include ../std.mk
|
||||
|
||||
pre-uninstall:
|
||||
|
||||
post-install:
|
660
dd/dd.c
Normal file
660
dd/dd.c
Normal file
@@ -0,0 +1,660 @@
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
|
||||
#define BIG 2147483647
|
||||
#define LCASE (1<<0)
|
||||
#define UCASE (1<<1)
|
||||
#define SWAB (1<<2)
|
||||
#define NERR (1<<3)
|
||||
#define SYNC (1<<4)
|
||||
int cflag;
|
||||
int fflag;
|
||||
char *string;
|
||||
char *ifile;
|
||||
char *ofile;
|
||||
char *ibuf;
|
||||
char *obuf;
|
||||
vlong skip;
|
||||
vlong oseekn;
|
||||
vlong iseekn;
|
||||
vlong count;
|
||||
long files = 1;
|
||||
long ibs = 512;
|
||||
long obs = 512;
|
||||
long bs;
|
||||
long cbs;
|
||||
long ibc;
|
||||
long obc;
|
||||
long cbc;
|
||||
long nifr;
|
||||
long nipr;
|
||||
long nofr;
|
||||
long nopr;
|
||||
long ntrunc;
|
||||
int dotrunc = 1;
|
||||
int ibf;
|
||||
int obf;
|
||||
char *op;
|
||||
int nspace;
|
||||
uchar etoa[256];
|
||||
uchar atoe[256];
|
||||
uchar atoibm[256];
|
||||
|
||||
void flsh(void);
|
||||
int match(char *s);
|
||||
vlong number(long big);
|
||||
void cnull(int cc);
|
||||
void null(int c);
|
||||
void ascii(int cc);
|
||||
void unblock(int cc);
|
||||
void ebcdic(int cc);
|
||||
void ibm(int cc);
|
||||
void block(int cc);
|
||||
void term(void);
|
||||
void stats(void);
|
||||
|
||||
#define iskey(s) ((key[0] == '-') && (strcmp(key+1, s) == 0))
|
||||
|
||||
void
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
void (*conv)(int);
|
||||
char *ip;
|
||||
char *key;
|
||||
int a, c;
|
||||
|
||||
conv = null;
|
||||
for(c=1; c<argc; c++) {
|
||||
key = argv[c++];
|
||||
if(c >= argc){
|
||||
fprint(2, "dd: arg %s needs a value\n", key);
|
||||
exits("arg");
|
||||
}
|
||||
string = argv[c];
|
||||
if(iskey("ibs")) {
|
||||
ibs = number(BIG);
|
||||
continue;
|
||||
}
|
||||
if(iskey("obs")) {
|
||||
obs = number(BIG);
|
||||
continue;
|
||||
}
|
||||
if(iskey("cbs")) {
|
||||
cbs = number(BIG);
|
||||
continue;
|
||||
}
|
||||
if(iskey("bs")) {
|
||||
bs = number(BIG);
|
||||
continue;
|
||||
}
|
||||
if(iskey("if")) {
|
||||
ifile = string;
|
||||
continue;
|
||||
}
|
||||
if(iskey("of")) {
|
||||
ofile = string;
|
||||
continue;
|
||||
}
|
||||
if(iskey("trunc")) {
|
||||
dotrunc = number(BIG);
|
||||
continue;
|
||||
}
|
||||
if(iskey("skip")) {
|
||||
skip = number(BIG);
|
||||
continue;
|
||||
}
|
||||
if(iskey("seek") || iskey("oseek")) {
|
||||
oseekn = number(BIG);
|
||||
continue;
|
||||
}
|
||||
if(iskey("iseek")) {
|
||||
iseekn = number(BIG);
|
||||
continue;
|
||||
}
|
||||
if(iskey("count")) {
|
||||
count = number(BIG);
|
||||
continue;
|
||||
}
|
||||
if(iskey("files")) {
|
||||
files = number(BIG);
|
||||
continue;
|
||||
}
|
||||
if(iskey("conv")) {
|
||||
cloop:
|
||||
if(match(","))
|
||||
goto cloop;
|
||||
if(*string == '\0')
|
||||
continue;
|
||||
if(match("ebcdic")) {
|
||||
conv = ebcdic;
|
||||
goto cloop;
|
||||
}
|
||||
if(match("ibm")) {
|
||||
conv = ibm;
|
||||
goto cloop;
|
||||
}
|
||||
if(match("ascii")) {
|
||||
conv = ascii;
|
||||
goto cloop;
|
||||
}
|
||||
if(match("block")) {
|
||||
conv = block;
|
||||
goto cloop;
|
||||
}
|
||||
if(match("unblock")) {
|
||||
conv = unblock;
|
||||
goto cloop;
|
||||
}
|
||||
if(match("lcase")) {
|
||||
cflag |= LCASE;
|
||||
goto cloop;
|
||||
}
|
||||
if(match("ucase")) {
|
||||
cflag |= UCASE;
|
||||
goto cloop;
|
||||
}
|
||||
if(match("swab")) {
|
||||
cflag |= SWAB;
|
||||
goto cloop;
|
||||
}
|
||||
if(match("noerror")) {
|
||||
cflag |= NERR;
|
||||
goto cloop;
|
||||
}
|
||||
if(match("sync")) {
|
||||
cflag |= SYNC;
|
||||
goto cloop;
|
||||
}
|
||||
}
|
||||
fprint(2, "dd: bad arg: %s\n", key);
|
||||
exits("arg");
|
||||
}
|
||||
if(conv == null && cflag&(LCASE|UCASE))
|
||||
conv = cnull;
|
||||
if(ifile)
|
||||
ibf = open(ifile, 0);
|
||||
else
|
||||
ibf = dup(0, -1);
|
||||
if(ibf < 0) {
|
||||
fprint(2, "dd: open %s: %r\n", ifile);
|
||||
exits("open");
|
||||
}
|
||||
if(ofile){
|
||||
if(dotrunc)
|
||||
obf = create(ofile, 1, 0664);
|
||||
else
|
||||
obf = open(ofile, 1);
|
||||
if(obf < 0) {
|
||||
fprint(2, "dd: create %s: %r\n", ofile);
|
||||
exits("create");
|
||||
}
|
||||
}else{
|
||||
obf = dup(1, -1);
|
||||
if(obf < 0) {
|
||||
fprint(2, "dd: can't dup file descriptor: %s: %r\n", ofile);
|
||||
exits("dup");
|
||||
}
|
||||
}
|
||||
if(bs)
|
||||
ibs = obs = bs;
|
||||
if(ibs == obs && conv == null)
|
||||
fflag++;
|
||||
if(ibs == 0 || obs == 0) {
|
||||
fprint(2, "dd: counts: cannot be zero\n");
|
||||
exits("counts");
|
||||
}
|
||||
ibuf = sbrk(ibs);
|
||||
if(fflag)
|
||||
obuf = ibuf;
|
||||
else
|
||||
obuf = sbrk(obs);
|
||||
sbrk(64); /* For good measure */
|
||||
if(ibuf == (char *)-1 || obuf == (char *)-1) {
|
||||
fprint(2, "dd: not enough memory: %r\n");
|
||||
exits("memory");
|
||||
}
|
||||
ibc = 0;
|
||||
obc = 0;
|
||||
cbc = 0;
|
||||
op = obuf;
|
||||
|
||||
/*
|
||||
if(signal(SIGINT, SIG_IGN) != SIG_IGN)
|
||||
signal(SIGINT, term);
|
||||
*/
|
||||
seek(obf, obs*oseekn, 1);
|
||||
seek(ibf, ibs*iseekn, 1);
|
||||
while(skip) {
|
||||
read(ibf, ibuf, ibs);
|
||||
skip--;
|
||||
}
|
||||
|
||||
ip = 0;
|
||||
loop:
|
||||
if(ibc-- == 0) {
|
||||
ibc = 0;
|
||||
if(count==0 || nifr+nipr!=count) {
|
||||
if(cflag&(NERR|SYNC))
|
||||
for(ip=ibuf+ibs; ip>ibuf;)
|
||||
*--ip = 0;
|
||||
ibc = read(ibf, ibuf, ibs);
|
||||
}
|
||||
if(ibc == -1) {
|
||||
perror("read");
|
||||
if((cflag&NERR) == 0) {
|
||||
flsh();
|
||||
term();
|
||||
}
|
||||
ibc = 0;
|
||||
for(c=0; c<ibs; c++)
|
||||
if(ibuf[c] != 0)
|
||||
ibc = c;
|
||||
stats();
|
||||
}
|
||||
if(ibc == 0 && --files<=0) {
|
||||
flsh();
|
||||
term();
|
||||
}
|
||||
if(ibc != ibs) {
|
||||
nipr++;
|
||||
if(cflag&SYNC)
|
||||
ibc = ibs;
|
||||
} else
|
||||
nifr++;
|
||||
ip = ibuf;
|
||||
c = (ibc>>1) & ~1;
|
||||
if(cflag&SWAB && c)
|
||||
do {
|
||||
a = *ip++;
|
||||
ip[-1] = *ip;
|
||||
*ip++ = a;
|
||||
} while(--c);
|
||||
ip = ibuf;
|
||||
if(fflag) {
|
||||
obc = ibc;
|
||||
flsh();
|
||||
ibc = 0;
|
||||
}
|
||||
goto loop;
|
||||
}
|
||||
c = 0;
|
||||
c |= *ip++;
|
||||
c &= 0377;
|
||||
(*conv)(c);
|
||||
goto loop;
|
||||
}
|
||||
|
||||
void
|
||||
flsh(void)
|
||||
{
|
||||
int c;
|
||||
|
||||
if(obc) {
|
||||
c = write(obf, obuf, obc);
|
||||
if(c != obc) {
|
||||
if(c > 0)
|
||||
++nopr;
|
||||
perror("write");
|
||||
term();
|
||||
}
|
||||
if(obc == obs)
|
||||
nofr++;
|
||||
else
|
||||
nopr++;
|
||||
obc = 0;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
match(char *s)
|
||||
{
|
||||
char *cs;
|
||||
|
||||
cs = string;
|
||||
while(*cs++ == *s)
|
||||
if(*s++ == '\0')
|
||||
goto true;
|
||||
if(*s != '\0')
|
||||
return 0;
|
||||
|
||||
true:
|
||||
cs--;
|
||||
string = cs;
|
||||
return 1;
|
||||
}
|
||||
|
||||
vlong
|
||||
number(long big)
|
||||
{
|
||||
char *cs;
|
||||
vlong n;
|
||||
|
||||
cs = string;
|
||||
n = 0;
|
||||
while(*cs >= '0' && *cs <= '9')
|
||||
n = n*10 + *cs++ - '0';
|
||||
for(;;)
|
||||
switch(*cs++) {
|
||||
|
||||
case 'k':
|
||||
n *= 1024;
|
||||
continue;
|
||||
|
||||
/* case 'w':
|
||||
n *= sizeof(int);
|
||||
continue;
|
||||
*/
|
||||
|
||||
case 'b':
|
||||
n *= 512;
|
||||
continue;
|
||||
|
||||
/* case '*':*/
|
||||
case 'x':
|
||||
string = cs;
|
||||
n *= number(BIG);
|
||||
|
||||
case '\0':
|
||||
if(n>=big || n<0) {
|
||||
fprint(2, "dd: argument %lld out of range\n", n);
|
||||
exits("range");
|
||||
}
|
||||
return n;
|
||||
}
|
||||
/* never gets here */
|
||||
}
|
||||
|
||||
void
|
||||
cnull(int cc)
|
||||
{
|
||||
int c;
|
||||
|
||||
c = cc;
|
||||
if((cflag&UCASE) && c>='a' && c<='z')
|
||||
c += 'A'-'a';
|
||||
if((cflag&LCASE) && c>='A' && c<='Z')
|
||||
c += 'a'-'A';
|
||||
null(c);
|
||||
}
|
||||
|
||||
void
|
||||
null(int c)
|
||||
{
|
||||
|
||||
*op = c;
|
||||
op++;
|
||||
if(++obc >= obs) {
|
||||
flsh();
|
||||
op = obuf;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ascii(int cc)
|
||||
{
|
||||
int c;
|
||||
|
||||
c = etoa[cc];
|
||||
if(cbs == 0) {
|
||||
cnull(c);
|
||||
return;
|
||||
}
|
||||
if(c == ' ') {
|
||||
nspace++;
|
||||
goto out;
|
||||
}
|
||||
while(nspace > 0) {
|
||||
null(' ');
|
||||
nspace--;
|
||||
}
|
||||
cnull(c);
|
||||
|
||||
out:
|
||||
if(++cbc >= cbs) {
|
||||
null('\n');
|
||||
cbc = 0;
|
||||
nspace = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
unblock(int cc)
|
||||
{
|
||||
int c;
|
||||
|
||||
c = cc & 0377;
|
||||
if(cbs == 0) {
|
||||
cnull(c);
|
||||
return;
|
||||
}
|
||||
if(c == ' ') {
|
||||
nspace++;
|
||||
goto out;
|
||||
}
|
||||
while(nspace > 0) {
|
||||
null(' ');
|
||||
nspace--;
|
||||
}
|
||||
cnull(c);
|
||||
|
||||
out:
|
||||
if(++cbc >= cbs) {
|
||||
null('\n');
|
||||
cbc = 0;
|
||||
nspace = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ebcdic(int cc)
|
||||
{
|
||||
int c;
|
||||
|
||||
c = cc;
|
||||
if(cflag&UCASE && c>='a' && c<='z')
|
||||
c += 'A'-'a';
|
||||
if(cflag&LCASE && c>='A' && c<='Z')
|
||||
c += 'a'-'A';
|
||||
c = atoe[c];
|
||||
if(cbs == 0) {
|
||||
null(c);
|
||||
return;
|
||||
}
|
||||
if(cc == '\n') {
|
||||
while(cbc < cbs) {
|
||||
null(atoe[' ']);
|
||||
cbc++;
|
||||
}
|
||||
cbc = 0;
|
||||
return;
|
||||
}
|
||||
if(cbc == cbs)
|
||||
ntrunc++;
|
||||
cbc++;
|
||||
if(cbc <= cbs)
|
||||
null(c);
|
||||
}
|
||||
|
||||
void
|
||||
ibm(int cc)
|
||||
{
|
||||
int c;
|
||||
|
||||
c = cc;
|
||||
if(cflag&UCASE && c>='a' && c<='z')
|
||||
c += 'A'-'a';
|
||||
if(cflag&LCASE && c>='A' && c<='Z')
|
||||
c += 'a'-'A';
|
||||
c = atoibm[c] & 0377;
|
||||
if(cbs == 0) {
|
||||
null(c);
|
||||
return;
|
||||
}
|
||||
if(cc == '\n') {
|
||||
while(cbc < cbs) {
|
||||
null(atoibm[' ']);
|
||||
cbc++;
|
||||
}
|
||||
cbc = 0;
|
||||
return;
|
||||
}
|
||||
if(cbc == cbs)
|
||||
ntrunc++;
|
||||
cbc++;
|
||||
if(cbc <= cbs)
|
||||
null(c);
|
||||
}
|
||||
|
||||
void
|
||||
block(int cc)
|
||||
{
|
||||
int c;
|
||||
|
||||
c = cc;
|
||||
if(cflag&UCASE && c>='a' && c<='z')
|
||||
c += 'A'-'a';
|
||||
if(cflag&LCASE && c>='A' && c<='Z')
|
||||
c += 'a'-'A';
|
||||
c &= 0377;
|
||||
if(cbs == 0) {
|
||||
null(c);
|
||||
return;
|
||||
}
|
||||
if(cc == '\n') {
|
||||
while(cbc < cbs) {
|
||||
null(' ');
|
||||
cbc++;
|
||||
}
|
||||
cbc = 0;
|
||||
return;
|
||||
}
|
||||
if(cbc == cbs)
|
||||
ntrunc++;
|
||||
cbc++;
|
||||
if(cbc <= cbs)
|
||||
null(c);
|
||||
}
|
||||
|
||||
void
|
||||
term(void)
|
||||
{
|
||||
|
||||
stats();
|
||||
exits(0);
|
||||
}
|
||||
|
||||
void
|
||||
stats(void)
|
||||
{
|
||||
|
||||
fprint(2, "%lud+%lud records in\n", nifr, nipr);
|
||||
fprint(2, "%lud+%lud records out\n", nofr, nopr);
|
||||
if(ntrunc)
|
||||
fprint(2, "%lud truncated records\n", ntrunc);
|
||||
}
|
||||
|
||||
uchar etoa[] =
|
||||
{
|
||||
0000,0001,0002,0003,0234,0011,0206,0177,
|
||||
0227,0215,0216,0013,0014,0015,0016,0017,
|
||||
0020,0021,0022,0023,0235,0205,0010,0207,
|
||||
0030,0031,0222,0217,0034,0035,0036,0037,
|
||||
0200,0201,0202,0203,0204,0012,0027,0033,
|
||||
0210,0211,0212,0213,0214,0005,0006,0007,
|
||||
0220,0221,0026,0223,0224,0225,0226,0004,
|
||||
0230,0231,0232,0233,0024,0025,0236,0032,
|
||||
0040,0240,0241,0242,0243,0244,0245,0246,
|
||||
0247,0250,0133,0056,0074,0050,0053,0041,
|
||||
0046,0251,0252,0253,0254,0255,0256,0257,
|
||||
0260,0261,0135,0044,0052,0051,0073,0136,
|
||||
0055,0057,0262,0263,0264,0265,0266,0267,
|
||||
0270,0271,0174,0054,0045,0137,0076,0077,
|
||||
0272,0273,0274,0275,0276,0277,0300,0301,
|
||||
0302,0140,0072,0043,0100,0047,0075,0042,
|
||||
0303,0141,0142,0143,0144,0145,0146,0147,
|
||||
0150,0151,0304,0305,0306,0307,0310,0311,
|
||||
0312,0152,0153,0154,0155,0156,0157,0160,
|
||||
0161,0162,0313,0314,0315,0316,0317,0320,
|
||||
0321,0176,0163,0164,0165,0166,0167,0170,
|
||||
0171,0172,0322,0323,0324,0325,0326,0327,
|
||||
0330,0331,0332,0333,0334,0335,0336,0337,
|
||||
0340,0341,0342,0343,0344,0345,0346,0347,
|
||||
0173,0101,0102,0103,0104,0105,0106,0107,
|
||||
0110,0111,0350,0351,0352,0353,0354,0355,
|
||||
0175,0112,0113,0114,0115,0116,0117,0120,
|
||||
0121,0122,0356,0357,0360,0361,0362,0363,
|
||||
0134,0237,0123,0124,0125,0126,0127,0130,
|
||||
0131,0132,0364,0365,0366,0367,0370,0371,
|
||||
0060,0061,0062,0063,0064,0065,0066,0067,
|
||||
0070,0071,0372,0373,0374,0375,0376,0377,
|
||||
};
|
||||
uchar atoe[] =
|
||||
{
|
||||
0000,0001,0002,0003,0067,0055,0056,0057,
|
||||
0026,0005,0045,0013,0014,0015,0016,0017,
|
||||
0020,0021,0022,0023,0074,0075,0062,0046,
|
||||
0030,0031,0077,0047,0034,0035,0036,0037,
|
||||
0100,0117,0177,0173,0133,0154,0120,0175,
|
||||
0115,0135,0134,0116,0153,0140,0113,0141,
|
||||
0360,0361,0362,0363,0364,0365,0366,0367,
|
||||
0370,0371,0172,0136,0114,0176,0156,0157,
|
||||
0174,0301,0302,0303,0304,0305,0306,0307,
|
||||
0310,0311,0321,0322,0323,0324,0325,0326,
|
||||
0327,0330,0331,0342,0343,0344,0345,0346,
|
||||
0347,0350,0351,0112,0340,0132,0137,0155,
|
||||
0171,0201,0202,0203,0204,0205,0206,0207,
|
||||
0210,0211,0221,0222,0223,0224,0225,0226,
|
||||
0227,0230,0231,0242,0243,0244,0245,0246,
|
||||
0247,0250,0251,0300,0152,0320,0241,0007,
|
||||
0040,0041,0042,0043,0044,0025,0006,0027,
|
||||
0050,0051,0052,0053,0054,0011,0012,0033,
|
||||
0060,0061,0032,0063,0064,0065,0066,0010,
|
||||
0070,0071,0072,0073,0004,0024,0076,0341,
|
||||
0101,0102,0103,0104,0105,0106,0107,0110,
|
||||
0111,0121,0122,0123,0124,0125,0126,0127,
|
||||
0130,0131,0142,0143,0144,0145,0146,0147,
|
||||
0150,0151,0160,0161,0162,0163,0164,0165,
|
||||
0166,0167,0170,0200,0212,0213,0214,0215,
|
||||
0216,0217,0220,0232,0233,0234,0235,0236,
|
||||
0237,0240,0252,0253,0254,0255,0256,0257,
|
||||
0260,0261,0262,0263,0264,0265,0266,0267,
|
||||
0270,0271,0272,0273,0274,0275,0276,0277,
|
||||
0312,0313,0314,0315,0316,0317,0332,0333,
|
||||
0334,0335,0336,0337,0352,0353,0354,0355,
|
||||
0356,0357,0372,0373,0374,0375,0376,0377,
|
||||
};
|
||||
uchar atoibm[] =
|
||||
{
|
||||
0000,0001,0002,0003,0067,0055,0056,0057,
|
||||
0026,0005,0045,0013,0014,0015,0016,0017,
|
||||
0020,0021,0022,0023,0074,0075,0062,0046,
|
||||
0030,0031,0077,0047,0034,0035,0036,0037,
|
||||
0100,0132,0177,0173,0133,0154,0120,0175,
|
||||
0115,0135,0134,0116,0153,0140,0113,0141,
|
||||
0360,0361,0362,0363,0364,0365,0366,0367,
|
||||
0370,0371,0172,0136,0114,0176,0156,0157,
|
||||
0174,0301,0302,0303,0304,0305,0306,0307,
|
||||
0310,0311,0321,0322,0323,0324,0325,0326,
|
||||
0327,0330,0331,0342,0343,0344,0345,0346,
|
||||
0347,0350,0351,0255,0340,0275,0137,0155,
|
||||
0171,0201,0202,0203,0204,0205,0206,0207,
|
||||
0210,0211,0221,0222,0223,0224,0225,0226,
|
||||
0227,0230,0231,0242,0243,0244,0245,0246,
|
||||
0247,0250,0251,0300,0117,0320,0241,0007,
|
||||
0040,0041,0042,0043,0044,0025,0006,0027,
|
||||
0050,0051,0052,0053,0054,0011,0012,0033,
|
||||
0060,0061,0032,0063,0064,0065,0066,0010,
|
||||
0070,0071,0072,0073,0004,0024,0076,0341,
|
||||
0101,0102,0103,0104,0105,0106,0107,0110,
|
||||
0111,0121,0122,0123,0124,0125,0126,0127,
|
||||
0130,0131,0142,0143,0144,0145,0146,0147,
|
||||
0150,0151,0160,0161,0162,0163,0164,0165,
|
||||
0166,0167,0170,0200,0212,0213,0214,0215,
|
||||
0216,0217,0220,0232,0233,0234,0235,0236,
|
||||
0237,0240,0252,0253,0254,0255,0256,0257,
|
||||
0260,0261,0262,0263,0264,0265,0266,0267,
|
||||
0270,0271,0272,0273,0274,0275,0276,0277,
|
||||
0312,0313,0314,0315,0316,0317,0332,0333,
|
||||
0334,0335,0336,0337,0352,0353,0354,0355,
|
||||
0356,0357,0372,0373,0374,0375,0376,0377,
|
||||
};
|
35
diff/Makefile
Normal file
35
diff/Makefile
Normal file
@@ -0,0 +1,35 @@
|
||||
# diff - diff shell unix port from plan9
|
||||
# Depends on ../lib9
|
||||
|
||||
TARG = diff
|
||||
OFILES = diffdir.o diffio.o diffreg.o main.o
|
||||
MANFILES = diff.1
|
||||
|
||||
include ../config.mk
|
||||
|
||||
all: ${TARG}
|
||||
@strip ${TARG}
|
||||
@echo built ${TARG}
|
||||
|
||||
install: ${TARG}
|
||||
@mkdir -p ${DESTDIR}${PREFIX}/bin
|
||||
@cp -f ${TARG} ${DESTDIR}${PREFIX}/bin/
|
||||
@chmod 755 ${DESTDIR}${PREFIX}/bin/${TARG}
|
||||
@mkdir -p ${DESTDIR}${MANPREFIX}/man1
|
||||
@cp -f ${MANFILES} ${DESTDIR}${MANPREFIX}/man1
|
||||
@chmod 444 ${DESTDIR}${MANPREFIX}/man1/${MANFILES}
|
||||
|
||||
uninstall:
|
||||
rm -f ${DESTDIR}${PREFIX}/bin/${TARG}
|
||||
rm -f ${DESTDIR}${PREFIX}/man1/${MANFILES}
|
||||
|
||||
.c.o:
|
||||
@echo CC $*.c
|
||||
@${CC} ${CFLAGS} -I../lib9 -I${PREFIX}/include -I../lib9 $*.c
|
||||
|
||||
clean:
|
||||
rm -f ${OFILES} ${TARG}
|
||||
|
||||
${TARG}: ${OFILES}
|
||||
@echo LD ${TARG}
|
||||
@${CC} ${LDFLAGS} -o ${TARG} ${OFILES} -lm -L${PREFIX}/lib -L../lib9 -l9
|
163
diff/diff.1
Normal file
163
diff/diff.1
Normal file
@@ -0,0 +1,163 @@
|
||||
.TH DIFF 1
|
||||
.SH NAME
|
||||
diff \- differential file comparator
|
||||
.SH SYNOPSIS
|
||||
.B diff
|
||||
[
|
||||
.B -acefmnbwr
|
||||
] file1 ... file2
|
||||
.SH DESCRIPTION
|
||||
.I Diff
|
||||
tells what lines must be changed in two files to bring them
|
||||
into agreement.
|
||||
If one file
|
||||
is a directory,
|
||||
then a file in that directory with basename the same as that of
|
||||
the other file is used.
|
||||
If both files are directories, similarly named files in the
|
||||
two directories are compared by the method of
|
||||
.I diff
|
||||
for text
|
||||
files and
|
||||
.IR cmp (1)
|
||||
otherwise.
|
||||
If more than two file names are given, then each argument is compared
|
||||
to the last argument as above.
|
||||
The
|
||||
.B -r
|
||||
option causes
|
||||
.I diff
|
||||
to process similarly named subdirectories recursively.
|
||||
When processing more than one file,
|
||||
.I diff
|
||||
prefixes file differences with a single line
|
||||
listing the two differing files, in the form of
|
||||
a
|
||||
.I diff
|
||||
command line.
|
||||
The
|
||||
.B -m
|
||||
flag causes this behavior even when processing single files.
|
||||
.PP
|
||||
The normal output contains lines of these forms:
|
||||
.IP "" 5
|
||||
.I n1
|
||||
.B a
|
||||
.I n3,n4
|
||||
.br
|
||||
.I n1,n2
|
||||
.B d
|
||||
.I n3
|
||||
.br
|
||||
.I n1,n2
|
||||
.B c
|
||||
.I n3,n4
|
||||
.PP
|
||||
These lines resemble
|
||||
.I ed
|
||||
commands to convert
|
||||
.I file1
|
||||
into
|
||||
.IR file2 .
|
||||
The numbers after the letters pertain to
|
||||
.IR file2 .
|
||||
In fact, by exchanging `a' for `d' and reading backward
|
||||
one may ascertain equally how to convert
|
||||
.I file2
|
||||
into
|
||||
.IR file1 .
|
||||
As in
|
||||
.IR ed ,
|
||||
identical pairs where
|
||||
.I n1
|
||||
=
|
||||
.I n2
|
||||
or
|
||||
.I n3
|
||||
=
|
||||
.I n4
|
||||
are abbreviated as a single number.
|
||||
.PP
|
||||
Following each of these lines come all the lines that are
|
||||
affected in the first file flagged by `<',
|
||||
then all the lines that are affected in the second file
|
||||
flagged by `>'.
|
||||
.PP
|
||||
The
|
||||
.B -b
|
||||
option causes
|
||||
trailing blanks (spaces and tabs) to be ignored
|
||||
and other strings of blanks to compare equal.
|
||||
The
|
||||
.B -w
|
||||
option causes all white-space to be removed from input lines
|
||||
before applying the difference algorithm.
|
||||
.PP
|
||||
The
|
||||
.B -n
|
||||
option prefixes each range with
|
||||
.IB file : \fR
|
||||
and inserts a space around the
|
||||
.BR a ,
|
||||
.BR c ,
|
||||
and
|
||||
.B d
|
||||
verbs.
|
||||
The
|
||||
.B -e
|
||||
option produces a script of
|
||||
.I "a, c"
|
||||
and
|
||||
.I d
|
||||
commands for the editor
|
||||
.IR ed ,
|
||||
which will recreate
|
||||
.I file2
|
||||
from
|
||||
.IR file1 .
|
||||
The
|
||||
.B -f
|
||||
option produces a similar script,
|
||||
not useful with
|
||||
.IR ed ,
|
||||
in the opposite order. It may, however, be
|
||||
useful as input to a stream-oriented post-processor.
|
||||
.PP
|
||||
The
|
||||
.B -c
|
||||
option includes three lines of context around each
|
||||
change, merging changes whose contexts overlap.
|
||||
The
|
||||
.B -a
|
||||
flag displays the entire file as context.
|
||||
.PP
|
||||
Except in rare circumstances,
|
||||
.I diff
|
||||
finds a smallest sufficient set of file
|
||||
differences.
|
||||
.SH FILES
|
||||
.B /tmp/diff[12]
|
||||
.SH SOURCE
|
||||
.B \*9/src/cmd/diff
|
||||
.SH "SEE ALSO"
|
||||
.IR cmp (1),
|
||||
.IR comm (1),
|
||||
.IR ed (1)
|
||||
.SH DIAGNOSTICS
|
||||
Exit status is the empty string
|
||||
for no differences,
|
||||
.L some
|
||||
for some,
|
||||
and
|
||||
.L error
|
||||
for trouble.
|
||||
.SH BUGS
|
||||
Editing scripts produced under the
|
||||
.BR -e " or"
|
||||
.BR -f " option are naive about"
|
||||
creating lines consisting of a single `\fB.\fR'.
|
||||
.PP
|
||||
When running
|
||||
.I diff
|
||||
on directories, the notion of what is a text
|
||||
file is open to debate.
|
27
diff/diff.h
Normal file
27
diff/diff.h
Normal file
@@ -0,0 +1,27 @@
|
||||
#define stdout bstdout
|
||||
|
||||
char mode; /* '\0', 'e', 'f', 'h' */
|
||||
char bflag; /* ignore multiple and trailing blanks */
|
||||
char rflag; /* recurse down directory trees */
|
||||
char mflag; /* pseudo flag: doing multiple files, one dir */
|
||||
int anychange;
|
||||
extern Biobuf stdout;
|
||||
extern int binary;
|
||||
|
||||
#define MALLOC(t, n) ((t *)emalloc((n)*sizeof(t)))
|
||||
#define REALLOC(p, t, n) ((t *)erealloc((void *)(p), (n)*sizeof(t)))
|
||||
#define FREE(p) free((void *)(p))
|
||||
|
||||
#define MAXPATHLEN 1024
|
||||
|
||||
int mkpathname(char *, char *, char *);
|
||||
void *emalloc(unsigned);
|
||||
void *erealloc(void *, unsigned);
|
||||
void diff(char *, char *, int);
|
||||
void diffdir(char *, char *, int);
|
||||
void diffreg(char *, char *);
|
||||
Biobuf *prepare(int, char *);
|
||||
void panic(int, char *, ...);
|
||||
void check(Biobuf *, Biobuf *);
|
||||
void change(int, int, int, int);
|
||||
void flushchanges(void);
|
113
diff/diffdir.c
Normal file
113
diff/diffdir.c
Normal file
@@ -0,0 +1,113 @@
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <bio.h>
|
||||
#include "diff.h"
|
||||
|
||||
static int
|
||||
itemcmp(const void *v1, const void *v2)
|
||||
{
|
||||
char *const*d1 = v1, *const*d2 = v2;
|
||||
|
||||
return strcmp(*d1, *d2);
|
||||
}
|
||||
|
||||
static char **
|
||||
scandir(char *name)
|
||||
{
|
||||
char **cp;
|
||||
Dir *db;
|
||||
int nitems;
|
||||
int fd, n;
|
||||
|
||||
if ((fd = open(name, OREAD)) < 0){
|
||||
panic(mflag ? 0 : 2, "can't open %s\n", name);
|
||||
return nil;
|
||||
}
|
||||
cp = 0;
|
||||
nitems = 0;
|
||||
if((n = dirreadall(fd, &db)) > 0){
|
||||
while (n--) {
|
||||
cp = REALLOC(cp, char *, (nitems+1));
|
||||
cp[nitems] = MALLOC(char, strlen((db+n)->name)+1);
|
||||
strcpy(cp[nitems], (db+n)->name);
|
||||
nitems++;
|
||||
}
|
||||
free(db);
|
||||
}
|
||||
cp = REALLOC(cp, char*, (nitems+1));
|
||||
cp[nitems] = 0;
|
||||
close(fd);
|
||||
qsort((char *)cp, nitems, sizeof(char*), itemcmp);
|
||||
return cp;
|
||||
}
|
||||
|
||||
static int
|
||||
isdotordotdot(char *p)
|
||||
{
|
||||
if (*p == '.') {
|
||||
if (!p[1])
|
||||
return 1;
|
||||
if (p[1] == '.' && !p[2])
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
diffdir(char *f, char *t, int level)
|
||||
{
|
||||
char **df, **dt, **dirf, **dirt;
|
||||
char *from, *to;
|
||||
int res;
|
||||
char fb[MAXPATHLEN+1], tb[MAXPATHLEN+1];
|
||||
|
||||
df = scandir(f);
|
||||
dt = scandir(t);
|
||||
dirf = df;
|
||||
dirt = dt;
|
||||
if(df == nil || dt == nil)
|
||||
goto Out;
|
||||
while (*df || *dt) {
|
||||
from = *df;
|
||||
to = *dt;
|
||||
if (from && isdotordotdot(from)) {
|
||||
df++;
|
||||
continue;
|
||||
}
|
||||
if (to && isdotordotdot(to)) {
|
||||
dt++;
|
||||
continue;
|
||||
}
|
||||
if (!from)
|
||||
res = 1;
|
||||
else if (!to)
|
||||
res = -1;
|
||||
else
|
||||
res = strcmp(from, to);
|
||||
if (res < 0) {
|
||||
if (mode == 0 || mode == 'n')
|
||||
Bprint(&stdout, "Only in %s: %s\n", f, from);
|
||||
df++;
|
||||
continue;
|
||||
}
|
||||
if (res > 0) {
|
||||
if (mode == 0 || mode == 'n')
|
||||
Bprint(&stdout, "Only in %s: %s\n", t, to);
|
||||
dt++;
|
||||
continue;
|
||||
}
|
||||
if (mkpathname(fb, f, from))
|
||||
continue;
|
||||
if (mkpathname(tb, t, to))
|
||||
continue;
|
||||
diff(fb, tb, level+1);
|
||||
df++; dt++;
|
||||
}
|
||||
Out:
|
||||
for (df = dirf; df && *df; df++)
|
||||
FREE(*df);
|
||||
for (dt = dirt; dt && *dt; dt++)
|
||||
FREE(*dt);
|
||||
FREE(dirf);
|
||||
FREE(dirt);
|
||||
}
|
387
diff/diffio.c
Normal file
387
diff/diffio.c
Normal file
@@ -0,0 +1,387 @@
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <bio.h>
|
||||
#include <ctype.h>
|
||||
#include "diff.h"
|
||||
|
||||
struct line {
|
||||
int serial;
|
||||
int value;
|
||||
};
|
||||
extern struct line *file[2];
|
||||
extern int len[2];
|
||||
extern long *ixold, *ixnew;
|
||||
extern int *J;
|
||||
|
||||
static Biobuf *input[2];
|
||||
static char *file1, *file2;
|
||||
static int firstchange;
|
||||
|
||||
#define MAXLINELEN 4096
|
||||
#define MIN(x, y) ((x) < (y) ? (x): (y))
|
||||
|
||||
static int
|
||||
readline(Biobuf *bp, char *buf)
|
||||
{
|
||||
int c;
|
||||
char *p, *e;
|
||||
|
||||
p = buf;
|
||||
e = p + MAXLINELEN-1;
|
||||
do {
|
||||
c = Bgetc(bp);
|
||||
if (c < 0) {
|
||||
if (p == buf)
|
||||
return -1;
|
||||
break;
|
||||
}
|
||||
if (c == '\n')
|
||||
break;
|
||||
*p++ = c;
|
||||
} while (p < e);
|
||||
*p = 0;
|
||||
if (c != '\n' && c >= 0) {
|
||||
do c = Bgetc(bp);
|
||||
while (c >= 0 && c != '\n');
|
||||
}
|
||||
return p - buf;
|
||||
}
|
||||
|
||||
#define HALFLONG 16
|
||||
#define low(x) (x&((1L<<HALFLONG)-1))
|
||||
#define high(x) (x>>HALFLONG)
|
||||
|
||||
/*
|
||||
* hashing has the effect of
|
||||
* arranging line in 7-bit bytes and then
|
||||
* summing 1-s complement in 16-bit hunks
|
||||
*/
|
||||
static int
|
||||
readhash(Biobuf *bp, char *buf)
|
||||
{
|
||||
long sum;
|
||||
unsigned shift;
|
||||
char *p;
|
||||
int len, space;
|
||||
|
||||
sum = 1;
|
||||
shift = 0;
|
||||
if ((len = readline(bp, buf)) == -1)
|
||||
return 0;
|
||||
p = buf;
|
||||
switch(bflag) /* various types of white space handling */
|
||||
{
|
||||
case 0:
|
||||
while (len--) {
|
||||
sum += (long)*p++ << (shift &= (HALFLONG-1));
|
||||
shift += 7;
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
/*
|
||||
* coalesce multiple white-space
|
||||
*/
|
||||
for (space = 0; len--; p++) {
|
||||
if (isspace((uchar)*p)) {
|
||||
space++;
|
||||
continue;
|
||||
}
|
||||
if (space) {
|
||||
shift += 7;
|
||||
space = 0;
|
||||
}
|
||||
sum += (long)*p << (shift &= (HALFLONG-1));
|
||||
shift += 7;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
/*
|
||||
* strip all white-space
|
||||
*/
|
||||
while (len--) {
|
||||
if (isspace((uchar)*p)) {
|
||||
p++;
|
||||
continue;
|
||||
}
|
||||
sum += (long)*p++ << (shift &= (HALFLONG-1));
|
||||
shift += 7;
|
||||
}
|
||||
break;
|
||||
}
|
||||
sum = low(sum) + high(sum);
|
||||
return ((short)low(sum) + (short)high(sum));
|
||||
}
|
||||
|
||||
Biobuf *
|
||||
prepare(int i, char *arg)
|
||||
{
|
||||
struct line *p;
|
||||
int j, h;
|
||||
Biobuf *bp;
|
||||
char *cp, buf[MAXLINELEN];
|
||||
int nbytes;
|
||||
Rune r;
|
||||
|
||||
bp = Bopen(arg, OREAD);
|
||||
if (!bp) {
|
||||
panic(mflag ? 0: 2, "cannot open %s: %r\n", arg);
|
||||
return 0;
|
||||
}
|
||||
if (binary)
|
||||
return bp;
|
||||
nbytes = Bread(bp, buf, MIN(1024, MAXLINELEN));
|
||||
if (nbytes > 0) {
|
||||
cp = buf;
|
||||
while (cp < buf+nbytes-UTFmax) {
|
||||
/*
|
||||
* heuristic for a binary file in the
|
||||
* brave new UNICODE world
|
||||
*/
|
||||
cp += chartorune(&r, cp);
|
||||
if (r == 0 || (r > 0x7f && r <= 0xa0)) {
|
||||
binary++;
|
||||
return bp;
|
||||
}
|
||||
}
|
||||
Bseek(bp, 0, 0);
|
||||
}
|
||||
p = MALLOC(struct line, 3);
|
||||
for (j = 0; h = readhash(bp, buf); p[j].value = h)
|
||||
p = REALLOC(p, struct line, (++j+3));
|
||||
len[i] = j;
|
||||
file[i] = p;
|
||||
input[i] = bp; /*fix*/
|
||||
if (i == 0) { /*fix*/
|
||||
file1 = arg;
|
||||
firstchange = 0;
|
||||
}
|
||||
else
|
||||
file2 = arg;
|
||||
return bp;
|
||||
}
|
||||
|
||||
static int
|
||||
squishspace(char *buf)
|
||||
{
|
||||
char *p, *q;
|
||||
int space;
|
||||
|
||||
for (space = 0, q = p = buf; *q; q++) {
|
||||
if (isspace((uchar)*q)) {
|
||||
space++;
|
||||
continue;
|
||||
}
|
||||
if (space && bflag == 1) {
|
||||
*p++ = ' ';
|
||||
space = 0;
|
||||
}
|
||||
*p++ = *q;
|
||||
}
|
||||
*p = 0;
|
||||
return p - buf;
|
||||
}
|
||||
|
||||
/*
|
||||
* need to fix up for unexpected EOF's
|
||||
*/
|
||||
void
|
||||
check(Biobuf *bf, Biobuf *bt)
|
||||
{
|
||||
int f, t, flen, tlen;
|
||||
char fbuf[MAXLINELEN], tbuf[MAXLINELEN];
|
||||
|
||||
ixold[0] = ixnew[0] = 0;
|
||||
for (f = t = 1; f < len[0]; f++) {
|
||||
flen = readline(bf, fbuf);
|
||||
ixold[f] = ixold[f-1] + flen + 1; /* ftell(bf) */
|
||||
if (J[f] == 0)
|
||||
continue;
|
||||
do {
|
||||
tlen = readline(bt, tbuf);
|
||||
ixnew[t] = ixnew[t-1] + tlen + 1; /* ftell(bt) */
|
||||
} while (t++ < J[f]);
|
||||
if (bflag) {
|
||||
flen = squishspace(fbuf);
|
||||
tlen = squishspace(tbuf);
|
||||
}
|
||||
if (flen != tlen || strcmp(fbuf, tbuf))
|
||||
J[f] = 0;
|
||||
}
|
||||
while (t < len[1]) {
|
||||
tlen = readline(bt, tbuf);
|
||||
ixnew[t] = ixnew[t-1] + tlen + 1; /* fseek(bt) */
|
||||
t++;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
range(int a, int b, char *separator)
|
||||
{
|
||||
Bprint(&stdout, "%d", a > b ? b: a);
|
||||
if (a < b)
|
||||
Bprint(&stdout, "%s%d", separator, b);
|
||||
}
|
||||
|
||||
static void
|
||||
fetch(long *f, int a, int b, Biobuf *bp, char *s)
|
||||
{
|
||||
char buf[MAXLINELEN];
|
||||
int maxb;
|
||||
|
||||
if(a <= 1)
|
||||
a = 1;
|
||||
if(bp == input[0])
|
||||
maxb = len[0];
|
||||
else
|
||||
maxb = len[1];
|
||||
if(b > maxb)
|
||||
b = maxb;
|
||||
if(a > maxb)
|
||||
return;
|
||||
Bseek(bp, f[a-1], 0);
|
||||
while (a++ <= b) {
|
||||
readline(bp, buf);
|
||||
Bprint(&stdout, "%s%s\n", s, buf);
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct Change Change;
|
||||
struct Change
|
||||
{
|
||||
int a;
|
||||
int b;
|
||||
int c;
|
||||
int d;
|
||||
};
|
||||
|
||||
Change *changes;
|
||||
int nchanges;
|
||||
|
||||
void
|
||||
change(int a, int b, int c, int d)
|
||||
{
|
||||
char verb;
|
||||
char buf[4];
|
||||
Change *ch;
|
||||
|
||||
if (a > b && c > d)
|
||||
return;
|
||||
anychange = 1;
|
||||
if (mflag && firstchange == 0) {
|
||||
if(mode) {
|
||||
buf[0] = '-';
|
||||
buf[1] = mode;
|
||||
buf[2] = ' ';
|
||||
buf[3] = '\0';
|
||||
} else {
|
||||
buf[0] = '\0';
|
||||
}
|
||||
Bprint(&stdout, "diff %s%s %s\n", buf, file1, file2);
|
||||
firstchange = 1;
|
||||
}
|
||||
verb = a > b ? 'a': c > d ? 'd': 'c';
|
||||
switch(mode) {
|
||||
case 'e':
|
||||
range(a, b, ",");
|
||||
Bputc(&stdout, verb);
|
||||
break;
|
||||
case 0:
|
||||
range(a, b, ",");
|
||||
Bputc(&stdout, verb);
|
||||
range(c, d, ",");
|
||||
break;
|
||||
case 'n':
|
||||
Bprint(&stdout, "%s:", file1);
|
||||
range(a, b, ",");
|
||||
Bprint(&stdout, " %c ", verb);
|
||||
Bprint(&stdout, "%s:", file2);
|
||||
range(c, d, ",");
|
||||
break;
|
||||
case 'f':
|
||||
Bputc(&stdout, verb);
|
||||
range(a, b, " ");
|
||||
break;
|
||||
case 'c':
|
||||
case 'a':
|
||||
if(nchanges%1024 == 0)
|
||||
changes = erealloc(changes, (nchanges+1024)*sizeof(changes[0]));
|
||||
ch = &changes[nchanges++];
|
||||
ch->a = a;
|
||||
ch->b = b;
|
||||
ch->c = c;
|
||||
ch->d = d;
|
||||
return;
|
||||
}
|
||||
Bputc(&stdout, '\n');
|
||||
if (mode == 0 || mode == 'n') {
|
||||
fetch(ixold, a, b, input[0], "< ");
|
||||
if (a <= b && c <= d)
|
||||
Bprint(&stdout, "---\n");
|
||||
}
|
||||
fetch(ixnew, c, d, input[1], mode == 0 || mode == 'n' ? "> ": "");
|
||||
if (mode != 0 && mode != 'n' && c <= d)
|
||||
Bprint(&stdout, ".\n");
|
||||
}
|
||||
|
||||
enum
|
||||
{
|
||||
Lines = 3 /* number of lines of context shown */
|
||||
};
|
||||
|
||||
int
|
||||
changeset(int i)
|
||||
{
|
||||
while(i<nchanges && changes[i].b+1+2*Lines > changes[i+1].a)
|
||||
i++;
|
||||
if(i<nchanges)
|
||||
return i+1;
|
||||
return nchanges;
|
||||
}
|
||||
|
||||
void
|
||||
flushchanges(void)
|
||||
{
|
||||
int a, b, c, d, at;
|
||||
int i, j;
|
||||
|
||||
if(nchanges == 0)
|
||||
return;
|
||||
|
||||
for(i=0; i<nchanges; ){
|
||||
j = changeset(i);
|
||||
a = changes[i].a-Lines;
|
||||
b = changes[j-1].b+Lines;
|
||||
c = changes[i].c-Lines;
|
||||
d = changes[j-1].d+Lines;
|
||||
if(a < 1)
|
||||
a = 1;
|
||||
if(c < 1)
|
||||
c = 1;
|
||||
if(b > len[0])
|
||||
b = len[0];
|
||||
if(d > len[1])
|
||||
d = len[1];
|
||||
if(mode == 'a'){
|
||||
a = 1;
|
||||
b = len[0];
|
||||
c = 1;
|
||||
d = len[1];
|
||||
j = nchanges;
|
||||
}
|
||||
Bprint(&stdout, "%s:", file1);
|
||||
range(a, b, ",");
|
||||
Bprint(&stdout, " - ");
|
||||
Bprint(&stdout, "%s:", file2);
|
||||
range(c, d, ",");
|
||||
Bputc(&stdout, '\n');
|
||||
at = a;
|
||||
for(; i<j; i++){
|
||||
fetch(ixold, at, changes[i].a-1, input[0], " ");
|
||||
fetch(ixold, changes[i].a, changes[i].b, input[0], "- ");
|
||||
fetch(ixnew, changes[i].c, changes[i].d, input[1], "+ ");
|
||||
at = changes[i].b+1;
|
||||
}
|
||||
fetch(ixold, at, b, input[0], " ");
|
||||
}
|
||||
nchanges = 0;
|
||||
}
|
420
diff/diffreg.c
Normal file
420
diff/diffreg.c
Normal file
@@ -0,0 +1,420 @@
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <bio.h>
|
||||
#include "diff.h"
|
||||
|
||||
/* diff - differential file comparison
|
||||
*
|
||||
* Uses an algorithm due to Harold Stone, which finds
|
||||
* a pair of longest identical subsequences in the two
|
||||
* files.
|
||||
*
|
||||
* The major goal is to generate the match vector J.
|
||||
* J[i] is the index of the line in file1 corresponding
|
||||
* to line i file0. J[i] = 0 if there is no
|
||||
* such line in file1.
|
||||
*
|
||||
* Lines are hashed so as to work in core. All potential
|
||||
* matches are located by sorting the lines of each file
|
||||
* on the hash (called value). In particular, this
|
||||
* collects the equivalence classes in file1 together.
|
||||
* Subroutine equiv replaces the value of each line in
|
||||
* file0 by the index of the first element of its
|
||||
* matching equivalence in (the reordered) file1.
|
||||
* To save space equiv squeezes file1 into a single
|
||||
* array member in which the equivalence classes
|
||||
* are simply concatenated, except that their first
|
||||
* members are flagged by changing sign.
|
||||
*
|
||||
* Next the indices that point into member are unsorted into
|
||||
* array class according to the original order of file0.
|
||||
*
|
||||
* The cleverness lies in routine stone. This marches
|
||||
* through the lines of file0, developing a vector klist
|
||||
* of "k-candidates". At step i a k-candidate is a matched
|
||||
* pair of lines x,y (x in file0 y in file1) such that
|
||||
* there is a common subsequence of lenght k
|
||||
* between the first i lines of file0 and the first y
|
||||
* lines of file1, but there is no such subsequence for
|
||||
* any smaller y. x is the earliest possible mate to y
|
||||
* that occurs in such a subsequence.
|
||||
*
|
||||
* Whenever any of the members of the equivalence class of
|
||||
* lines in file1 matable to a line in file0 has serial number
|
||||
* less than the y of some k-candidate, that k-candidate
|
||||
* with the smallest such y is replaced. The new
|
||||
* k-candidate is chained (via pred) to the current
|
||||
* k-1 candidate so that the actual subsequence can
|
||||
* be recovered. When a member has serial number greater
|
||||
* that the y of all k-candidates, the klist is extended.
|
||||
* At the end, the longest subsequence is pulled out
|
||||
* and placed in the array J by unravel.
|
||||
*
|
||||
* With J in hand, the matches there recorded are
|
||||
* check'ed against reality to assure that no spurious
|
||||
* matches have crept in due to hashing. If they have,
|
||||
* they are broken, and "jackpot " is recorded--a harmless
|
||||
* matter except that a true match for a spuriously
|
||||
* mated line may now be unnecessarily reported as a change.
|
||||
*
|
||||
* Much of the complexity of the program comes simply
|
||||
* from trying to minimize core utilization and
|
||||
* maximize the range of doable problems by dynamically
|
||||
* allocating what is needed and reusing what is not.
|
||||
* The core requirements for problems larger than somewhat
|
||||
* are (in words) 2*length(file0) + length(file1) +
|
||||
* 3*(number of k-candidates installed), typically about
|
||||
* 6n words for files of length n.
|
||||
*/
|
||||
/* TIDY THIS UP */
|
||||
struct cand {
|
||||
int x;
|
||||
int y;
|
||||
int pred;
|
||||
} cand;
|
||||
struct line {
|
||||
int serial;
|
||||
int value;
|
||||
} *file[2], line;
|
||||
int len[2];
|
||||
int binary;
|
||||
struct line *sfile[2]; /*shortened by pruning common prefix and suffix*/
|
||||
int slen[2];
|
||||
int pref, suff; /*length of prefix and suffix*/
|
||||
int *class; /*will be overlaid on file[0]*/
|
||||
int *member; /*will be overlaid on file[1]*/
|
||||
int *klist; /*will be overlaid on file[0] after class*/
|
||||
struct cand *clist; /* merely a free storage pot for candidates */
|
||||
int clen;
|
||||
int *J; /*will be overlaid on class*/
|
||||
long *ixold; /*will be overlaid on klist*/
|
||||
long *ixnew; /*will be overlaid on file[1]*/
|
||||
/* END OF SOME TIDYING */
|
||||
|
||||
static void
|
||||
sort(struct line *a, int n) /*shellsort CACM #201*/
|
||||
{
|
||||
int m;
|
||||
struct line *ai, *aim, *j, *k;
|
||||
struct line w;
|
||||
int i;
|
||||
|
||||
m = 0;
|
||||
for (i = 1; i <= n; i *= 2)
|
||||
m = 2*i - 1;
|
||||
for (m /= 2; m != 0; m /= 2) {
|
||||
k = a+(n-m);
|
||||
for (j = a+1; j <= k; j++) {
|
||||
ai = j;
|
||||
aim = ai+m;
|
||||
do {
|
||||
if (aim->value > ai->value ||
|
||||
aim->value == ai->value &&
|
||||
aim->serial > ai->serial)
|
||||
break;
|
||||
w = *ai;
|
||||
*ai = *aim;
|
||||
*aim = w;
|
||||
|
||||
aim = ai;
|
||||
ai -= m;
|
||||
} while (ai > a && aim >= ai);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
unsort(struct line *f, int l, int *b)
|
||||
{
|
||||
int *a;
|
||||
int i;
|
||||
|
||||
a = MALLOC(int, (l+1));
|
||||
for(i=1;i<=l;i++)
|
||||
a[f[i].serial] = f[i].value;
|
||||
for(i=1;i<=l;i++)
|
||||
b[i] = a[i];
|
||||
FREE(a);
|
||||
}
|
||||
|
||||
static void
|
||||
prune(void)
|
||||
{
|
||||
int i,j;
|
||||
|
||||
for(pref=0;pref<len[0]&&pref<len[1]&&
|
||||
file[0][pref+1].value==file[1][pref+1].value;
|
||||
pref++ ) ;
|
||||
for(suff=0;suff<len[0]-pref&&suff<len[1]-pref&&
|
||||
file[0][len[0]-suff].value==file[1][len[1]-suff].value;
|
||||
suff++) ;
|
||||
for(j=0;j<2;j++) {
|
||||
sfile[j] = file[j]+pref;
|
||||
slen[j] = len[j]-pref-suff;
|
||||
for(i=0;i<=slen[j];i++)
|
||||
sfile[j][i].serial = i;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
equiv(struct line *a, int n, struct line *b, int m, int *c)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
i = j = 1;
|
||||
while(i<=n && j<=m) {
|
||||
if(a[i].value < b[j].value)
|
||||
a[i++].value = 0;
|
||||
else if(a[i].value == b[j].value)
|
||||
a[i++].value = j;
|
||||
else
|
||||
j++;
|
||||
}
|
||||
while(i <= n)
|
||||
a[i++].value = 0;
|
||||
b[m+1].value = 0;
|
||||
j = 0;
|
||||
while(++j <= m) {
|
||||
c[j] = -b[j].serial;
|
||||
while(b[j+1].value == b[j].value) {
|
||||
j++;
|
||||
c[j] = b[j].serial;
|
||||
}
|
||||
}
|
||||
c[j] = -1;
|
||||
}
|
||||
|
||||
static int
|
||||
newcand(int x, int y, int pred)
|
||||
{
|
||||
struct cand *q;
|
||||
|
||||
clist = REALLOC(clist, struct cand, (clen+1));
|
||||
q = clist + clen;
|
||||
q->x = x;
|
||||
q->y = y;
|
||||
q->pred = pred;
|
||||
return clen++;
|
||||
}
|
||||
|
||||
static int
|
||||
search(int *c, int k, int y)
|
||||
{
|
||||
int i, j, l;
|
||||
int t;
|
||||
|
||||
if(clist[c[k]].y < y) /*quick look for typical case*/
|
||||
return k+1;
|
||||
i = 0;
|
||||
j = k+1;
|
||||
while((l=(i+j)/2) > i) {
|
||||
t = clist[c[l]].y;
|
||||
if(t > y)
|
||||
j = l;
|
||||
else if(t < y)
|
||||
i = l;
|
||||
else
|
||||
return l;
|
||||
}
|
||||
return l+1;
|
||||
}
|
||||
|
||||
static int
|
||||
stone(int *a, int n, int *b, int *c)
|
||||
{
|
||||
int i, k,y;
|
||||
int j, l;
|
||||
int oldc, tc;
|
||||
int oldl;
|
||||
|
||||
k = 0;
|
||||
c[0] = newcand(0,0,0);
|
||||
for(i=1; i<=n; i++) {
|
||||
j = a[i];
|
||||
if(j==0)
|
||||
continue;
|
||||
y = -b[j];
|
||||
oldl = 0;
|
||||
oldc = c[0];
|
||||
do {
|
||||
if(y <= clist[oldc].y)
|
||||
continue;
|
||||
l = search(c, k, y);
|
||||
if(l!=oldl+1)
|
||||
oldc = c[l-1];
|
||||
if(l<=k) {
|
||||
if(clist[c[l]].y <= y)
|
||||
continue;
|
||||
tc = c[l];
|
||||
c[l] = newcand(i,y,oldc);
|
||||
oldc = tc;
|
||||
oldl = l;
|
||||
} else {
|
||||
c[l] = newcand(i,y,oldc);
|
||||
k++;
|
||||
break;
|
||||
}
|
||||
} while((y=b[++j]) > 0);
|
||||
}
|
||||
return k;
|
||||
}
|
||||
|
||||
static void
|
||||
unravel(int p)
|
||||
{
|
||||
int i;
|
||||
struct cand *q;
|
||||
|
||||
for(i=0; i<=len[0]; i++) {
|
||||
if (i <= pref)
|
||||
J[i] = i;
|
||||
else if (i > len[0]-suff)
|
||||
J[i] = i+len[1]-len[0];
|
||||
else
|
||||
J[i] = 0;
|
||||
}
|
||||
for(q=clist+p;q->y!=0;q=clist+q->pred)
|
||||
J[q->x+pref] = q->y+pref;
|
||||
}
|
||||
|
||||
static void
|
||||
output(void)
|
||||
{
|
||||
int m, i0, i1, j0, j1;
|
||||
|
||||
m = len[0];
|
||||
J[0] = 0;
|
||||
J[m+1] = len[1]+1;
|
||||
if (mode != 'e') {
|
||||
for (i0 = 1; i0 <= m; i0 = i1+1) {
|
||||
while (i0 <= m && J[i0] == J[i0-1]+1)
|
||||
i0++;
|
||||
j0 = J[i0-1]+1;
|
||||
i1 = i0-1;
|
||||
while (i1 < m && J[i1+1] == 0)
|
||||
i1++;
|
||||
j1 = J[i1+1]-1;
|
||||
J[i1] = j1;
|
||||
change(i0, i1, j0, j1);
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (i0 = m; i0 >= 1; i0 = i1-1) {
|
||||
while (i0 >= 1 && J[i0] == J[i0+1]-1 && J[i0])
|
||||
i0--;
|
||||
j0 = J[i0+1]-1;
|
||||
i1 = i0+1;
|
||||
while (i1 > 1 && J[i1-1] == 0)
|
||||
i1--;
|
||||
j1 = J[i1-1]+1;
|
||||
J[i1] = j1;
|
||||
change(i1 , i0, j1, j0);
|
||||
}
|
||||
}
|
||||
if (m == 0)
|
||||
change(1, 0, 1, len[1]);
|
||||
flushchanges();
|
||||
}
|
||||
|
||||
#define BUF 4096
|
||||
static int
|
||||
cmp(Biobuf* b1, Biobuf* b2)
|
||||
{
|
||||
int n;
|
||||
uchar buf1[BUF], buf2[BUF];
|
||||
int f1, f2;
|
||||
vlong nc = 1;
|
||||
uchar *b1s, *b1e, *b2s, *b2e;
|
||||
|
||||
f1 = Bfildes(b1);
|
||||
f2 = Bfildes(b2);
|
||||
seek(f1, 0, 0);
|
||||
seek(f2, 0, 0);
|
||||
b1s = b1e = buf1;
|
||||
b2s = b2e = buf2;
|
||||
for(;;){
|
||||
if(b1s >= b1e){
|
||||
if(b1s >= &buf1[BUF])
|
||||
b1s = buf1;
|
||||
n = read(f1, b1s, &buf1[BUF] - b1s);
|
||||
b1e = b1s + n;
|
||||
}
|
||||
if(b2s >= b2e){
|
||||
if(b2s >= &buf2[BUF])
|
||||
b2s = buf2;
|
||||
n = read(f2, b2s, &buf2[BUF] - b2s);
|
||||
b2e = b2s + n;
|
||||
}
|
||||
n = b2e - b2s;
|
||||
if(n > b1e - b1s)
|
||||
n = b1e - b1s;
|
||||
if(n <= 0)
|
||||
break;
|
||||
if(memcmp((void *)b1s, (void *)b2s, n) != 0){
|
||||
return 1;
|
||||
}
|
||||
nc += n;
|
||||
b1s += n;
|
||||
b2s += n;
|
||||
}
|
||||
if(b1e - b1s == b2e - b2s)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
void
|
||||
diffreg(char *f, char *t)
|
||||
{
|
||||
Biobuf *b0, *b1;
|
||||
int k;
|
||||
|
||||
binary = 0;
|
||||
b0 = prepare(0, f);
|
||||
if (!b0)
|
||||
return;
|
||||
b1 = prepare(1, t);
|
||||
if (!b1) {
|
||||
FREE(file[0]);
|
||||
Bterm(b0);
|
||||
return;
|
||||
}
|
||||
if (binary){
|
||||
/* could use b0 and b1 but this is simpler. */
|
||||
if (cmp(b0, b1))
|
||||
print("binary files %s %s differ\n", f, t);
|
||||
Bterm(b0);
|
||||
Bterm(b1);
|
||||
return;
|
||||
}
|
||||
clen = 0;
|
||||
prune();
|
||||
sort(sfile[0], slen[0]);
|
||||
sort(sfile[1], slen[1]);
|
||||
|
||||
member = (int *)file[1];
|
||||
equiv(sfile[0], slen[0], sfile[1], slen[1], member);
|
||||
member = REALLOC(member, int, slen[1]+2);
|
||||
|
||||
class = (int *)file[0];
|
||||
unsort(sfile[0], slen[0], class);
|
||||
class = REALLOC(class, int, slen[0]+2);
|
||||
|
||||
klist = MALLOC(int, slen[0]+2);
|
||||
clist = MALLOC(struct cand, 1);
|
||||
k = stone(class, slen[0], member, klist);
|
||||
FREE(member);
|
||||
FREE(class);
|
||||
|
||||
J = MALLOC(int, len[0]+2);
|
||||
unravel(klist[k]);
|
||||
FREE(clist);
|
||||
FREE(klist);
|
||||
|
||||
ixold = MALLOC(long, len[0]+2);
|
||||
ixnew = MALLOC(long, len[1]+2);
|
||||
Bseek(b0, 0, 0); Bseek(b1, 0, 0);
|
||||
check(b0, b1);
|
||||
output();
|
||||
FREE(J); FREE(ixold); FREE(ixnew);
|
||||
Bterm(b0); Bterm(b1); /* ++++ */
|
||||
}
|
270
diff/main.c
Normal file
270
diff/main.c
Normal file
@@ -0,0 +1,270 @@
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <bio.h>
|
||||
#include "diff.h"
|
||||
|
||||
#define DIRECTORY(s) ((s)->qid.type&QTDIR)
|
||||
#define REGULAR_FILE(s) ((s)->type == 'M' && !DIRECTORY(s))
|
||||
|
||||
Biobuf stdout;
|
||||
|
||||
static char *tmp[] = {"/tmp/diff1XXXXXXXXXXX", "/tmp/diff2XXXXXXXXXXX"};
|
||||
static int whichtmp;
|
||||
static char *progname;
|
||||
static char usage[] = "diff [ -acefmnbwr ] file1 ... file2\n";
|
||||
|
||||
static void
|
||||
rmtmpfiles(void)
|
||||
{
|
||||
while (whichtmp > 0) {
|
||||
whichtmp--;
|
||||
remove(tmp[whichtmp]);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
done(int status)
|
||||
{
|
||||
rmtmpfiles();
|
||||
switch(status)
|
||||
{
|
||||
case 0:
|
||||
exits("");
|
||||
case 1:
|
||||
exits("some");
|
||||
default:
|
||||
exits("error");
|
||||
}
|
||||
/*NOTREACHED*/
|
||||
}
|
||||
|
||||
void
|
||||
panic(int status, char *fmt, ...)
|
||||
{
|
||||
va_list arg;
|
||||
|
||||
Bflush(&stdout);
|
||||
|
||||
fprint(2, "%s: ", progname);
|
||||
va_start(arg, fmt);
|
||||
vfprint(2, fmt, arg);
|
||||
va_end(arg);
|
||||
if (status)
|
||||
done(status);
|
||||
/*NOTREACHED*/
|
||||
}
|
||||
|
||||
static int
|
||||
catch(void *a, char *msg)
|
||||
{
|
||||
USED(a);
|
||||
panic(2, msg);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int
|
||||
mkpathname(char *pathname, char *path, char *name)
|
||||
{
|
||||
if (strlen(path) + strlen(name) > MAXPATHLEN) {
|
||||
panic(0, "pathname %s/%s too long\n", path, name);
|
||||
return 1;
|
||||
}
|
||||
sprint(pathname, "%s/%s", path, name);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static char *
|
||||
mktmpfile(int input, Dir **sb)
|
||||
{
|
||||
int fd, i;
|
||||
char *p;
|
||||
char buf[8192];
|
||||
|
||||
atnotify(catch, 1);
|
||||
/*
|
||||
p = mktemp(tmp[whichtmp++]);
|
||||
fd = create(p, OWRITE, 0600);
|
||||
*/
|
||||
fd = mkstemp(p=tmp[whichtmp++]);
|
||||
if (fd < 0) {
|
||||
panic(mflag ? 0: 2, "cannot create %s: %r\n", p);
|
||||
return 0;
|
||||
}
|
||||
while ((i = read(input, buf, sizeof(buf))) > 0) {
|
||||
if ((i = write(fd, buf, i)) < 0)
|
||||
break;
|
||||
}
|
||||
*sb = dirfstat(fd);
|
||||
close(fd);
|
||||
if (i < 0) {
|
||||
panic(mflag ? 0: 2, "cannot read/write %s: %r\n", p);
|
||||
return 0;
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
static char *
|
||||
statfile(char *file, Dir **sb)
|
||||
{
|
||||
Dir *dir;
|
||||
int input;
|
||||
|
||||
dir = dirstat(file);
|
||||
if(dir == nil) {
|
||||
if (strcmp(file, "-") || (dir = dirfstat(0)) == nil) {
|
||||
panic(mflag ? 0: 2, "cannot stat %s: %r\n", file);
|
||||
return 0;
|
||||
}
|
||||
free(dir);
|
||||
return mktmpfile(0, sb);
|
||||
}
|
||||
else if (!REGULAR_FILE(dir) && !DIRECTORY(dir)) {
|
||||
free(dir);
|
||||
if ((input = open(file, OREAD)) == -1) {
|
||||
panic(mflag ? 0: 2, "cannot open %s: %r\n", file);
|
||||
return 0;
|
||||
}
|
||||
file = mktmpfile(input, sb);
|
||||
close(input);
|
||||
}
|
||||
else
|
||||
*sb = dir;
|
||||
return file;
|
||||
}
|
||||
|
||||
void
|
||||
diff(char *f, char *t, int level)
|
||||
{
|
||||
char *fp, *tp, *p, fb[MAXPATHLEN+1], tb[MAXPATHLEN+1];
|
||||
Dir *fsb, *tsb;
|
||||
|
||||
if ((fp = statfile(f, &fsb)) == 0)
|
||||
goto Return;
|
||||
if ((tp = statfile(t, &tsb)) == 0){
|
||||
free(fsb);
|
||||
goto Return;
|
||||
}
|
||||
if (DIRECTORY(fsb) && DIRECTORY(tsb)) {
|
||||
if (rflag || level == 0)
|
||||
diffdir(fp, tp, level);
|
||||
else
|
||||
Bprint(&stdout, "Common subdirectories: %s and %s\n",
|
||||
fp, tp);
|
||||
}
|
||||
else if (REGULAR_FILE(fsb) && REGULAR_FILE(tsb))
|
||||
diffreg(fp, tp);
|
||||
else {
|
||||
if (REGULAR_FILE(fsb)) {
|
||||
if ((p = utfrrune(f, '/')) == 0)
|
||||
p = f;
|
||||
else
|
||||
p++;
|
||||
if (mkpathname(tb, tp, p) == 0)
|
||||
diffreg(fp, tb);
|
||||
}
|
||||
else {
|
||||
if ((p = utfrrune(t, '/')) == 0)
|
||||
p = t;
|
||||
else
|
||||
p++;
|
||||
if (mkpathname(fb, fp, p) == 0)
|
||||
diffreg(fb, tp);
|
||||
}
|
||||
}
|
||||
free(fsb);
|
||||
free(tsb);
|
||||
Return:
|
||||
rmtmpfiles();
|
||||
}
|
||||
|
||||
void
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
char *p;
|
||||
int i;
|
||||
Dir *fsb, *tsb;
|
||||
extern int _p9usepwlibrary;
|
||||
|
||||
_p9usepwlibrary = 0;
|
||||
Binit(&stdout, 1, OWRITE);
|
||||
progname = *argv;
|
||||
while (--argc && (*++argv)[0] == '-' && (*argv)[1]) {
|
||||
for (p = *argv+1; *p; p++) {
|
||||
switch (*p) {
|
||||
|
||||
case 'e':
|
||||
case 'f':
|
||||
case 'n':
|
||||
case 'c':
|
||||
case 'a':
|
||||
mode = *p;
|
||||
break;
|
||||
|
||||
case 'w':
|
||||
bflag = 2;
|
||||
break;
|
||||
|
||||
case 'b':
|
||||
bflag = 1;
|
||||
break;
|
||||
|
||||
case 'r':
|
||||
rflag = 1;
|
||||
mflag = 1;
|
||||
break;
|
||||
|
||||
case 'm':
|
||||
mflag = 1;
|
||||
break;
|
||||
|
||||
case 'h':
|
||||
default:
|
||||
progname = "Usage";
|
||||
panic(2, usage);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (argc < 2)
|
||||
panic(2, usage, progname);
|
||||
if ((tsb = dirstat(argv[argc-1])) == nil)
|
||||
panic(2, "can't stat %s\n", argv[argc-1]);
|
||||
if (argc > 2) {
|
||||
if (!DIRECTORY(tsb))
|
||||
panic(2, usage, progname);
|
||||
mflag = 1;
|
||||
}
|
||||
else {
|
||||
if ((fsb = dirstat(argv[0])) == nil)
|
||||
panic(2, "can't stat %s\n", argv[0]);
|
||||
if (DIRECTORY(fsb) && DIRECTORY(tsb))
|
||||
mflag = 1;
|
||||
free(fsb);
|
||||
}
|
||||
free(tsb);
|
||||
for (i = 0; i < argc-1; i++)
|
||||
diff(argv[i], argv[argc-1], 0);
|
||||
done(anychange);
|
||||
/*NOTREACHED*/
|
||||
}
|
||||
|
||||
static char noroom[] = "out of memory - try diff -h\n";
|
||||
|
||||
void *
|
||||
emalloc(unsigned n)
|
||||
{
|
||||
register void *p;
|
||||
|
||||
if ((p = malloc(n)) == 0)
|
||||
panic(2, noroom);
|
||||
return p;
|
||||
}
|
||||
|
||||
void *
|
||||
erealloc(void *p, unsigned n)
|
||||
{
|
||||
register void *rp;
|
||||
|
||||
if ((rp = realloc(p, n)) == 0)
|
||||
panic(2, noroom);
|
||||
return rp;
|
||||
}
|
10
join/Makefile
Normal file
10
join/Makefile
Normal file
@@ -0,0 +1,10 @@
|
||||
# join - join unix port from plan9
|
||||
# Depends on ../lib9
|
||||
|
||||
TARG = join
|
||||
|
||||
include ../std.mk
|
||||
|
||||
pre-uninstall:
|
||||
|
||||
post-install:
|
147
join/join.1
Normal file
147
join/join.1
Normal file
@@ -0,0 +1,147 @@
|
||||
.TH JOIN 1
|
||||
.CT 1 files
|
||||
.SH NAME
|
||||
join \- relational database operator
|
||||
.SH SYNOPSIS
|
||||
.B join
|
||||
[
|
||||
.I options
|
||||
]
|
||||
.I file1 file2
|
||||
.SH DESCRIPTION
|
||||
.I Join
|
||||
forms, on the standard output,
|
||||
a join
|
||||
of the two relations specified by the lines of
|
||||
.I file1
|
||||
and
|
||||
.IR file2 .
|
||||
If one of the file names is
|
||||
.LR - ,
|
||||
the standard input is used.
|
||||
.PP
|
||||
.I File1
|
||||
and
|
||||
.I file2
|
||||
must be sorted in increasing
|
||||
.SM ASCII
|
||||
collating
|
||||
sequence on the fields
|
||||
on which they are to be joined,
|
||||
normally the first in each line.
|
||||
.PP
|
||||
There is one line in the output
|
||||
for each pair of lines in
|
||||
.I file1
|
||||
and
|
||||
.I file2
|
||||
that have identical join fields.
|
||||
The output line normally consists of the common field,
|
||||
then the rest of the line from
|
||||
.IR file1 ,
|
||||
then the rest of the line from
|
||||
.IR file2 .
|
||||
.PP
|
||||
Input fields are normally separated spaces or tabs;
|
||||
output fields by space.
|
||||
In this case, multiple separators count as one, and
|
||||
leading separators are discarded.
|
||||
.PP
|
||||
The following options are recognized, with POSIX syntax.
|
||||
.TP
|
||||
.BI -a " n
|
||||
In addition to the normal output,
|
||||
produce a line for each unpairable line in file
|
||||
.IR n ,
|
||||
where
|
||||
.I n
|
||||
is 1 or 2.
|
||||
.TP
|
||||
.BI -v " n
|
||||
Like
|
||||
.BR -a ,
|
||||
omitting output for paired lines.
|
||||
.TP
|
||||
.BI -e " s
|
||||
Replace empty output fields by string
|
||||
.IR s .
|
||||
.TP
|
||||
.BI -1 " m
|
||||
.br
|
||||
.ns
|
||||
.TP
|
||||
.BI -2 " m
|
||||
Join on the
|
||||
.IR m th
|
||||
field of
|
||||
.I file1
|
||||
or
|
||||
.IR file2 .
|
||||
.TP
|
||||
.BI -j "n m"
|
||||
Archaic equivalent for
|
||||
.BI - n " m"\f1.
|
||||
.TP
|
||||
.BI -o fields
|
||||
Each output line comprises the designated fields.
|
||||
The comma-separated field designators are either
|
||||
.BR 0 ,
|
||||
meaning the join field, or have the form
|
||||
.IR n . m ,
|
||||
where
|
||||
.I n
|
||||
is a file number and
|
||||
.I m
|
||||
is a field number.
|
||||
Archaic usage allows separate arguments for field designators.
|
||||
.PP
|
||||
.TP
|
||||
.BI -t c
|
||||
Use character
|
||||
.I c
|
||||
as the only separator (tab character) on input and output.
|
||||
Every appearance of
|
||||
.I c
|
||||
in a line is significant.
|
||||
.SH EXAMPLES
|
||||
.TP
|
||||
.L
|
||||
sort /etc/passwd | join -t: -1 1 -a 1 -e "" - bdays
|
||||
Add birthdays to the
|
||||
.B /etc/passwd
|
||||
file, leaving unknown
|
||||
birthdays empty.
|
||||
The layout of
|
||||
.B /adm/users
|
||||
is given in
|
||||
.IR passwd (5);
|
||||
.B bdays
|
||||
contains sorted lines like
|
||||
.LR "ken:Feb\ 4,\ 1953" .
|
||||
.TP
|
||||
.L
|
||||
tr : ' ' </etc/passwd | sort -k 3 3 >temp
|
||||
.br
|
||||
.ns
|
||||
.TP
|
||||
.L
|
||||
join -1 3 -2 3 -o 1.1,2.1 temp temp | awk '$1 < $2'
|
||||
Print all pairs of users with identical userids.
|
||||
.SH SOURCE
|
||||
.B \*9/src/cmd/join.c
|
||||
.SH "SEE ALSO"
|
||||
.IR sort (1),
|
||||
.IR comm (1),
|
||||
.IR awk (1)
|
||||
.SH BUGS
|
||||
With default field separation,
|
||||
the collating sequence is that of
|
||||
.BI "sort -b"
|
||||
.BI -k y , y\f1;
|
||||
with
|
||||
.BR -t ,
|
||||
the sequence is that of
|
||||
.BI "sort -t" x
|
||||
.BI -k y , y\f1.
|
||||
.PP
|
||||
One of the files must be randomly accessible.
|
369
join/join.c
Normal file
369
join/join.c
Normal file
@@ -0,0 +1,369 @@
|
||||
/* join F1 F2 on stuff */
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#define F1 0
|
||||
#define F2 1
|
||||
#define F0 3
|
||||
#define NFLD 100 /* max field per line */
|
||||
#define comp() runecmp(ppi[F1][j1],ppi[F2][j2])
|
||||
FILE *f[2];
|
||||
Rune buf[2][BUFSIZ]; /*input lines */
|
||||
Rune *ppi[2][NFLD+1]; /* pointers to fields in lines */
|
||||
Rune *s1,*s2;
|
||||
#define j1 joinj1
|
||||
#define j2 joinj2
|
||||
|
||||
int j1 = 1; /* join of this field of file 1 */
|
||||
int j2 = 1; /* join of this field of file 2 */
|
||||
int olist[2*NFLD]; /* output these fields */
|
||||
int olistf[2*NFLD]; /* from these files */
|
||||
int no; /* number of entries in olist */
|
||||
Rune sep1 = ' '; /* default field separator */
|
||||
Rune sep2 = '\t';
|
||||
char *sepstr=" ";
|
||||
int discard; /* count of truncated lines */
|
||||
Rune null[BUFSIZ]/* = L""*/;
|
||||
int a1;
|
||||
int a2;
|
||||
|
||||
char *getoptarg(int*, char***);
|
||||
void output(int, int);
|
||||
int input(int);
|
||||
void oparse(char*);
|
||||
void error(char*, char*);
|
||||
void seek1(void), seek2(void);
|
||||
Rune *strtorune(Rune *, char *);
|
||||
|
||||
|
||||
void
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
int i;
|
||||
|
||||
while (argc > 1 && argv[1][0] == '-') {
|
||||
if (argv[1][1] == '\0')
|
||||
break;
|
||||
switch (argv[1][1]) {
|
||||
case '-':
|
||||
argc--;
|
||||
argv++;
|
||||
goto proceed;
|
||||
case 'a':
|
||||
switch(*getoptarg(&argc, &argv)) {
|
||||
case '1':
|
||||
a1++;
|
||||
break;
|
||||
case '2':
|
||||
a2++;
|
||||
break;
|
||||
default:
|
||||
error("incomplete option -a","");
|
||||
}
|
||||
break;
|
||||
case 'e':
|
||||
strtorune(null, getoptarg(&argc, &argv));
|
||||
break;
|
||||
case 't':
|
||||
sepstr=getoptarg(&argc, &argv);
|
||||
chartorune(&sep1, sepstr);
|
||||
sep2 = sep1;
|
||||
break;
|
||||
case 'o':
|
||||
if(argv[1][2]!=0 ||
|
||||
argc>2 && strchr(argv[2],',')!=0)
|
||||
oparse(getoptarg(&argc, &argv));
|
||||
else for (no = 0; no<2*NFLD && argc>2; no++){
|
||||
if (argv[2][0] == '1' && argv[2][1] == '.') {
|
||||
olistf[no] = F1;
|
||||
olist[no] = atoi(&argv[2][2]);
|
||||
} else if (argv[2][0] == '2' && argv[2][1] == '.') {
|
||||
olist[no] = atoi(&argv[2][2]);
|
||||
olistf[no] = F2;
|
||||
} else if (argv[2][0] == '0')
|
||||
olistf[no] = F0;
|
||||
else
|
||||
break;
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
break;
|
||||
case 'j':
|
||||
if(argc <= 2)
|
||||
break;
|
||||
if (argv[1][2] == '1')
|
||||
j1 = atoi(argv[2]);
|
||||
else if (argv[1][2] == '2')
|
||||
j2 = atoi(argv[2]);
|
||||
else
|
||||
j1 = j2 = atoi(argv[2]);
|
||||
argc--;
|
||||
argv++;
|
||||
break;
|
||||
case '1':
|
||||
j1 = atoi(getoptarg(&argc, &argv));
|
||||
break;
|
||||
case '2':
|
||||
j2 = atoi(getoptarg(&argc, &argv));
|
||||
break;
|
||||
}
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
proceed:
|
||||
for (i = 0; i < no; i++)
|
||||
if (olist[i]-- > NFLD) /* 0 origin */
|
||||
error("field number too big in -o","");
|
||||
if (argc != 3)
|
||||
error("usage: join [-1 x -2 y] [-o list] file1 file2","");
|
||||
j1--;
|
||||
j2--; /* everyone else believes in 0 origin */
|
||||
s1 = ppi[F1][j1];
|
||||
s2 = ppi[F2][j2];
|
||||
if (strcmp(argv[1], "-") == 0)
|
||||
f[F1] = stdin;
|
||||
else if ((f[F1] = fopen(argv[1], "r")) == 0)
|
||||
error("can't open %s", argv[1]);
|
||||
if(strcmp(argv[2], "-") == 0) {
|
||||
f[F2] = stdin;
|
||||
} else if ((f[F2] = fopen(argv[2], "r")) == 0)
|
||||
error("can't open %s", argv[2]);
|
||||
|
||||
if(ftell(f[F2]) >= 0)
|
||||
seek2();
|
||||
else if(ftell(f[F1]) >= 0)
|
||||
seek1();
|
||||
else
|
||||
error("neither file is randomly accessible","");
|
||||
if (discard)
|
||||
error("some input line was truncated", "");
|
||||
exits("");
|
||||
}
|
||||
int runecmp(Rune *a, Rune *b){
|
||||
while(*a==*b){
|
||||
if(*a=='\0') return 0;
|
||||
a++;
|
||||
b++;
|
||||
}
|
||||
if(*a<*b) return -1;
|
||||
return 1;
|
||||
}
|
||||
char *runetostr(char *buf, Rune *r){
|
||||
char *s;
|
||||
for(s=buf;*r;r++) s+=runetochar(s, r);
|
||||
*s='\0';
|
||||
return buf;
|
||||
}
|
||||
Rune *strtorune(Rune *buf, char *s){
|
||||
Rune *r;
|
||||
for(r=buf;*s;r++) s+=chartorune(r, s);
|
||||
*r='\0';
|
||||
return buf;
|
||||
}
|
||||
/* lazy. there ought to be a clean way to combine seek1 & seek2 */
|
||||
#define get1() n1=input(F1)
|
||||
#define get2() n2=input(F2)
|
||||
void
|
||||
seek2(void)
|
||||
{
|
||||
int n1, n2;
|
||||
int top2=0;
|
||||
int bot2 = ftell(f[F2]);
|
||||
get1();
|
||||
get2();
|
||||
while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
|
||||
if(n1>0 && n2>0 && comp()>0 || n1==0) {
|
||||
if(a2) output(0, n2);
|
||||
bot2 = ftell(f[F2]);
|
||||
get2();
|
||||
} else if(n1>0 && n2>0 && comp()<0 || n2==0) {
|
||||
if(a1) output(n1, 0);
|
||||
get1();
|
||||
} else /*(n1>0 && n2>0 && comp()==0)*/ {
|
||||
while(n2>0 && comp()==0) {
|
||||
output(n1, n2);
|
||||
top2 = ftell(f[F2]);
|
||||
get2();
|
||||
}
|
||||
fseek(f[F2], bot2, 0);
|
||||
get2();
|
||||
get1();
|
||||
for(;;) {
|
||||
if(n1>0 && n2>0 && comp()==0) {
|
||||
output(n1, n2);
|
||||
get2();
|
||||
} else if(n1>0 && n2>0 && comp()<0 || n2==0) {
|
||||
fseek(f[F2], bot2, 0);
|
||||
get2();
|
||||
get1();
|
||||
} else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{
|
||||
fseek(f[F2], top2, 0);
|
||||
bot2 = top2;
|
||||
get2();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
void
|
||||
seek1(void)
|
||||
{
|
||||
int n1, n2;
|
||||
int top1=0;
|
||||
int bot1 = ftell(f[F1]);
|
||||
get1();
|
||||
get2();
|
||||
while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
|
||||
if(n1>0 && n2>0 && comp()>0 || n1==0) {
|
||||
if(a2) output(0, n2);
|
||||
get2();
|
||||
} else if(n1>0 && n2>0 && comp()<0 || n2==0) {
|
||||
if(a1) output(n1, 0);
|
||||
bot1 = ftell(f[F1]);
|
||||
get1();
|
||||
} else /*(n1>0 && n2>0 && comp()==0)*/ {
|
||||
while(n2>0 && comp()==0) {
|
||||
output(n1, n2);
|
||||
top1 = ftell(f[F1]);
|
||||
get1();
|
||||
}
|
||||
fseek(f[F1], bot1, 0);
|
||||
get2();
|
||||
get1();
|
||||
for(;;) {
|
||||
if(n1>0 && n2>0 && comp()==0) {
|
||||
output(n1, n2);
|
||||
get1();
|
||||
} else if(n1>0 && n2>0 && comp()>0 || n1==0) {
|
||||
fseek(f[F1], bot1, 0);
|
||||
get2();
|
||||
get1();
|
||||
} else /*(n1>0 && n2>0 && comp()<0 || n2==0)*/{
|
||||
fseek(f[F1], top1, 0);
|
||||
bot1 = top1;
|
||||
get1();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
input(int n) /* get input line and split into fields */
|
||||
{
|
||||
register int i, c;
|
||||
Rune *bp;
|
||||
Rune **pp;
|
||||
char line[BUFSIZ];
|
||||
|
||||
bp = buf[n];
|
||||
pp = ppi[n];
|
||||
if (fgets(line, BUFSIZ, f[n]) == 0)
|
||||
return(0);
|
||||
strtorune(bp, line);
|
||||
i = 0;
|
||||
do {
|
||||
i++;
|
||||
if (sep1 == ' ') /* strip multiples */
|
||||
while ((c = *bp) == sep1 || c == sep2)
|
||||
bp++; /* skip blanks */
|
||||
*pp++ = bp; /* record beginning */
|
||||
while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0')
|
||||
bp++;
|
||||
*bp++ = '\0'; /* mark end by overwriting blank */
|
||||
} while (c != '\n' && c != '\0' && i < NFLD-1);
|
||||
if (c != '\n')
|
||||
discard++;
|
||||
|
||||
*pp = 0;
|
||||
return(i);
|
||||
}
|
||||
|
||||
void
|
||||
output(int on1, int on2) /* print items from olist */
|
||||
{
|
||||
int i;
|
||||
Rune *temp;
|
||||
char buf[BUFSIZ];
|
||||
|
||||
if (no <= 0) { /* default case */
|
||||
printf("%s", runetostr(buf, on1? ppi[F1][j1]: ppi[F2][j2]));
|
||||
for (i = 0; i < on1; i++)
|
||||
if (i != j1)
|
||||
printf("%s%s", sepstr, runetostr(buf, ppi[F1][i]));
|
||||
for (i = 0; i < on2; i++)
|
||||
if (i != j2)
|
||||
printf("%s%s", sepstr, runetostr(buf, ppi[F2][i]));
|
||||
printf("\n");
|
||||
} else {
|
||||
for (i = 0; i < no; i++) {
|
||||
if (olistf[i]==F0 && on1>j1)
|
||||
temp = ppi[F1][j1];
|
||||
else if (olistf[i]==F0 && on2>j2)
|
||||
temp = ppi[F2][j2];
|
||||
else {
|
||||
temp = ppi[olistf[i]][olist[i]];
|
||||
if(olistf[i]==F1 && on1<=olist[i] ||
|
||||
olistf[i]==F2 && on2<=olist[i] ||
|
||||
*temp==0)
|
||||
temp = null;
|
||||
}
|
||||
printf("%s", runetostr(buf, temp));
|
||||
if (i == no - 1)
|
||||
printf("\n");
|
||||
else
|
||||
printf("%s", sepstr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
error(char *s1, char *s2)
|
||||
{
|
||||
fprintf(stderr, "join: ");
|
||||
fprintf(stderr, s1, s2);
|
||||
fprintf(stderr, "\n");
|
||||
exits(s1);
|
||||
}
|
||||
|
||||
char *
|
||||
getoptarg(int *argcp, char ***argvp)
|
||||
{
|
||||
int argc = *argcp;
|
||||
char **argv = *argvp;
|
||||
if(argv[1][2] != 0)
|
||||
return &argv[1][2];
|
||||
if(argc<=2 || argv[2][0]=='-')
|
||||
error("incomplete option %s", argv[1]);
|
||||
*argcp = argc-1;
|
||||
*argvp = ++argv;
|
||||
return argv[1];
|
||||
}
|
||||
|
||||
void
|
||||
oparse(char *s)
|
||||
{
|
||||
for (no = 0; no<2*NFLD && *s; no++, s++) {
|
||||
switch(*s) {
|
||||
case 0:
|
||||
return;
|
||||
case '0':
|
||||
olistf[no] = F0;
|
||||
break;
|
||||
case '1':
|
||||
case '2':
|
||||
if(s[1] == '.' && isdigit((uchar)s[2])) {
|
||||
olistf[no] = *s=='1'? F1: F2;
|
||||
olist[no] = atoi(s += 2);
|
||||
break;
|
||||
} /* fall thru */
|
||||
default:
|
||||
error("invalid -o list", "");
|
||||
}
|
||||
if(s[1] == ',')
|
||||
s++;
|
||||
}
|
||||
}
|
@@ -11,7 +11,8 @@ enum
|
||||
UTFmax = 3, /* maximum bytes per rune */
|
||||
Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */
|
||||
Runeself = 0x80, /* rune and UTF sequences are the same (<) */
|
||||
Runeerror = 0xFFFD /* decoding error in UTF */
|
||||
Runeerror = 0xFFFD, /* decoding error in UTF */
|
||||
Runemax = 0x10FFFF /* maximum rune value */
|
||||
};
|
||||
|
||||
/* Edit .+1,/^$/ | cfn $PLAN9/src/lib9/utf/?*.c | grep -v static |grep -v __ */
|
||||
|
10
look/Makefile
Normal file
10
look/Makefile
Normal file
@@ -0,0 +1,10 @@
|
||||
# look - look unix port from plan9
|
||||
# Depends on ../lib9
|
||||
|
||||
TARG = look
|
||||
|
||||
include ../std.mk
|
||||
|
||||
pre-uninstall:
|
||||
|
||||
post-install:
|
85
look/look.1
Normal file
85
look/look.1
Normal file
@@ -0,0 +1,85 @@
|
||||
.TH LOOK 1
|
||||
.SH NAME
|
||||
look \- find lines in a sorted list
|
||||
.SH SYNOPSIS
|
||||
.B look
|
||||
[
|
||||
.BI -dfnixt c
|
||||
]
|
||||
[
|
||||
.I string
|
||||
]
|
||||
[
|
||||
.I file
|
||||
]
|
||||
.SH DESCRIPTION
|
||||
.I Look
|
||||
consults a sorted
|
||||
.I file
|
||||
and prints all lines that begin with
|
||||
.IR string .
|
||||
It uses binary search.
|
||||
.PP
|
||||
The following options are recognized.
|
||||
Options
|
||||
.B dfnt
|
||||
affect comparisons as in
|
||||
.IR sort (1).
|
||||
.TP
|
||||
.B -i
|
||||
Interactive.
|
||||
There is no
|
||||
.I string
|
||||
argument; instead
|
||||
.I look
|
||||
takes lines from the standard input as strings to be looked up.
|
||||
.TP
|
||||
.B -x
|
||||
Exact.
|
||||
Print only lines of the file whose key matches
|
||||
.I string
|
||||
exactly.
|
||||
.TP
|
||||
.B -d
|
||||
`Directory' order:
|
||||
only letters, digits,
|
||||
tabs and blanks participate in comparisons.
|
||||
.TP
|
||||
.B -f
|
||||
Fold.
|
||||
Upper case letters compare equal to lower case.
|
||||
.TP
|
||||
.B -n
|
||||
Numeric comparison with initial string of digits, optional minus sign,
|
||||
and optional decimal point.
|
||||
.TP
|
||||
.BR -t [ \f2c\f1 ]
|
||||
Character
|
||||
.I c
|
||||
terminates the sort key in the
|
||||
.IR file .
|
||||
By default, tab terminates the key. If
|
||||
.I c
|
||||
is missing the entire line comprises the key.
|
||||
.PP
|
||||
If no
|
||||
.I file
|
||||
is specified,
|
||||
.B /lib/words
|
||||
is assumed, with collating sequence
|
||||
.BR df .
|
||||
.SH FILES
|
||||
.B /lib/words
|
||||
.SH SOURCE
|
||||
.B \*9/src/cmd/look.c
|
||||
.SH "SEE ALSO"
|
||||
.IR sort (1),
|
||||
.IR grep (1)
|
||||
.SH DIAGNOSTICS
|
||||
The exit status is
|
||||
.RB `` "not found" ''
|
||||
if no match is found, and
|
||||
.RB `` "no dictionary" ''
|
||||
if
|
||||
.I file
|
||||
or the default dictionary cannot be opened.
|
349
look/look.c
Normal file
349
look/look.c
Normal file
@@ -0,0 +1,349 @@
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <bio.h>
|
||||
/* Macros for Rune support of ctype.h-like functions */
|
||||
|
||||
#undef isupper
|
||||
#undef islower
|
||||
#undef isalpha
|
||||
#undef isdigit
|
||||
#undef isalnum
|
||||
#undef isspace
|
||||
#undef tolower
|
||||
#define isupper(r) ('A' <= (r) && (r) <= 'Z')
|
||||
#define islower(r) ('a' <= (r) && (r) <= 'z')
|
||||
#define isalpha(r) (isupper(r) || islower(r))
|
||||
#define islatin1(r) (0xC0 <= (r) && (r) <= 0xFF)
|
||||
|
||||
#define isdigit(r) ('0' <= (r) && (r) <= '9')
|
||||
|
||||
#define isalnum(r) (isalpha(r) || isdigit(r))
|
||||
|
||||
#define isspace(r) ((r) == ' ' || (r) == '\t' \
|
||||
|| (0x0A <= (r) && (r) <= 0x0D))
|
||||
|
||||
#define tolower(r) ((r)-'A'+'a')
|
||||
|
||||
#define sgn(v) ((v) < 0 ? -1 : ((v) > 0 ? 1 : 0))
|
||||
|
||||
#define WORDSIZ 4000
|
||||
char *filename = "#9/lib/words";
|
||||
Biobuf *dfile;
|
||||
Biobuf bout;
|
||||
Biobuf bin;
|
||||
|
||||
int fold;
|
||||
int direc;
|
||||
int exact;
|
||||
int iflag;
|
||||
int rev = 1; /*-1 for reverse-ordered file, not implemented*/
|
||||
int (*compare)(Rune*, Rune*);
|
||||
Rune tab = '\t';
|
||||
Rune entry[WORDSIZ];
|
||||
Rune word[WORDSIZ];
|
||||
Rune key[50], orig[50];
|
||||
Rune latin_fold_tab[] =
|
||||
{
|
||||
/* Table to fold latin 1 characters to ASCII equivalents
|
||||
based at Rune value 0xc0
|
||||
|
||||
À Á Â Ã Ä Å Æ Ç
|
||||
È É Ê Ë Ì Í Î Ï
|
||||
Ð Ñ Ò Ó Ô Õ Ö ×
|
||||
Ø Ù Ú Û Ü Ý Þ ß
|
||||
à á â ã ä å æ ç
|
||||
è é ê ë ì í î ï
|
||||
ð ñ ò ó ô õ ö ÷
|
||||
ø ù ú û ü ý þ ÿ
|
||||
*/
|
||||
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
|
||||
'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
|
||||
'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
|
||||
'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 ,
|
||||
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
|
||||
'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
|
||||
'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
|
||||
'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y',
|
||||
};
|
||||
|
||||
int locate(void);
|
||||
int acomp(Rune*, Rune*);
|
||||
int getword(Biobuf*, Rune *rp, int n);
|
||||
void torune(char*, Rune*);
|
||||
void rcanon(Rune*, Rune*);
|
||||
int ncomp(Rune*, Rune*);
|
||||
|
||||
void
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
int n;
|
||||
|
||||
filename = unsharp(filename);
|
||||
|
||||
Binit(&bin, 0, OREAD);
|
||||
Binit(&bout, 1, OWRITE);
|
||||
compare = acomp;
|
||||
ARGBEGIN{
|
||||
case 'd':
|
||||
direc++;
|
||||
break;
|
||||
case 'f':
|
||||
fold++;
|
||||
break;
|
||||
case 'i':
|
||||
iflag++;
|
||||
break;
|
||||
case 'n':
|
||||
compare = ncomp;
|
||||
break;
|
||||
case 't':
|
||||
chartorune(&tab,ARGF());
|
||||
break;
|
||||
case 'x':
|
||||
exact++;
|
||||
break;
|
||||
default:
|
||||
fprint(2, "%s: bad option %c\n", argv0, ARGC());
|
||||
fprint(2, "usage: %s -[dfinx] [-t c] [string] [file]\n", argv0);
|
||||
exits("usage");
|
||||
} ARGEND
|
||||
if(!iflag){
|
||||
if(argc >= 1) {
|
||||
torune(argv[0], orig);
|
||||
argv++;
|
||||
argc--;
|
||||
} else
|
||||
iflag++;
|
||||
}
|
||||
if(argc < 1) {
|
||||
direc++;
|
||||
fold++;
|
||||
} else
|
||||
filename = argv[0];
|
||||
if (!iflag)
|
||||
rcanon(orig, key);
|
||||
dfile = Bopen(filename, OREAD);
|
||||
if(dfile == 0) {
|
||||
fprint(2, "look: can't open %s\n", filename);
|
||||
exits("no dictionary");
|
||||
}
|
||||
if(!iflag)
|
||||
if(!locate())
|
||||
exits("not found");
|
||||
do {
|
||||
if(iflag) {
|
||||
Bflush(&bout);
|
||||
if(!getword(&bin, orig, sizeof(orig)/sizeof(orig[0])))
|
||||
exits(0);
|
||||
rcanon(orig, key);
|
||||
if(!locate())
|
||||
continue;
|
||||
}
|
||||
if (!exact || !acomp(word, key))
|
||||
Bprint(&bout, "%S\n", entry);
|
||||
while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
|
||||
rcanon(entry, word);
|
||||
n = compare(key, word);
|
||||
switch(n) {
|
||||
case -1:
|
||||
if(exact)
|
||||
break;
|
||||
case 0:
|
||||
if (!exact || !acomp(word, orig))
|
||||
Bprint(&bout, "%S\n", entry);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
} while(iflag);
|
||||
exits(0);
|
||||
}
|
||||
|
||||
int
|
||||
locate(void)
|
||||
{
|
||||
vlong top, bot, mid;
|
||||
int c;
|
||||
int n;
|
||||
|
||||
bot = 0;
|
||||
top = Bseek(dfile, 0L, 2);
|
||||
for(;;) {
|
||||
mid = (top+bot) / 2;
|
||||
Bseek(dfile, mid, 0);
|
||||
do
|
||||
c = Bgetrune(dfile);
|
||||
while(c>=0 && c!='\n');
|
||||
mid = Boffset(dfile);
|
||||
if(!getword(dfile, entry, sizeof(entry)/sizeof(entry[0])))
|
||||
break;
|
||||
rcanon(entry, word);
|
||||
n = compare(key, word);
|
||||
switch(n) {
|
||||
case -2:
|
||||
case -1:
|
||||
case 0:
|
||||
if(top <= mid)
|
||||
break;
|
||||
top = mid;
|
||||
continue;
|
||||
case 1:
|
||||
case 2:
|
||||
bot = mid;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
Bseek(dfile, bot, 0);
|
||||
while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
|
||||
rcanon(entry, word);
|
||||
n = compare(key, word);
|
||||
switch(n) {
|
||||
case -2:
|
||||
return 0;
|
||||
case -1:
|
||||
if(exact)
|
||||
return 0;
|
||||
case 0:
|
||||
return 1;
|
||||
case 1:
|
||||
case 2:
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* acomp(s, t) returns:
|
||||
* -2 if s strictly precedes t
|
||||
* -1 if s is a prefix of t
|
||||
* 0 if s is the same as t
|
||||
* 1 if t is a prefix of s
|
||||
* 2 if t strictly precedes s
|
||||
*/
|
||||
|
||||
int
|
||||
acomp(Rune *s, Rune *t)
|
||||
{
|
||||
int cs, ct;
|
||||
|
||||
for(;;) {
|
||||
cs = *s;
|
||||
ct = *t;
|
||||
if(cs != ct)
|
||||
break;
|
||||
if(cs == 0)
|
||||
return 0;
|
||||
s++;
|
||||
t++;
|
||||
}
|
||||
if(cs == 0)
|
||||
return -1;
|
||||
if(ct == 0)
|
||||
return 1;
|
||||
if(cs < ct)
|
||||
return -2;
|
||||
return 2;
|
||||
}
|
||||
|
||||
void
|
||||
torune(char *old, Rune *new)
|
||||
{
|
||||
do old += chartorune(new, old);
|
||||
while(*new++);
|
||||
}
|
||||
|
||||
void
|
||||
rcanon(Rune *old, Rune *new)
|
||||
{
|
||||
Rune r;
|
||||
|
||||
while((r = *old++) && r != tab) {
|
||||
if (islatin1(r) && latin_fold_tab[r-0xc0])
|
||||
r = latin_fold_tab[r-0xc0];
|
||||
if(direc)
|
||||
if(!(isalnum(r) || r == ' ' || r == '\t'))
|
||||
continue;
|
||||
if(fold)
|
||||
if(isupper(r))
|
||||
r = tolower(r);
|
||||
*new++ = r;
|
||||
}
|
||||
*new = 0;
|
||||
}
|
||||
|
||||
int
|
||||
ncomp(Rune *s, Rune *t)
|
||||
{
|
||||
Rune *is, *it, *js, *jt;
|
||||
int a, b;
|
||||
int ssgn, tsgn;
|
||||
|
||||
while(isspace(*s))
|
||||
s++;
|
||||
while(isspace(*t))
|
||||
t++;
|
||||
ssgn = tsgn = -2*rev;
|
||||
if(*s == '-') {
|
||||
s++;
|
||||
ssgn = -ssgn;
|
||||
}
|
||||
if(*t == '-') {
|
||||
t++;
|
||||
tsgn = -tsgn;
|
||||
}
|
||||
for(is = s; isdigit(*is); is++)
|
||||
;
|
||||
for(it = t; isdigit(*it); it++)
|
||||
;
|
||||
js = is;
|
||||
jt = it;
|
||||
a = 0;
|
||||
if(ssgn == tsgn)
|
||||
while(it>t && is>s)
|
||||
if(b = *--it - *--is)
|
||||
a = b;
|
||||
while(is > s)
|
||||
if(*--is != '0')
|
||||
return -ssgn;
|
||||
while(it > t)
|
||||
if(*--it != '0')
|
||||
return tsgn;
|
||||
if(a)
|
||||
return sgn(a)*ssgn;
|
||||
if(*(s=js) == '.')
|
||||
s++;
|
||||
if(*(t=jt) == '.')
|
||||
t++;
|
||||
if(ssgn == tsgn)
|
||||
while(isdigit(*s) && isdigit(*t))
|
||||
if(a = *t++ - *s++)
|
||||
return sgn(a)*ssgn;
|
||||
while(isdigit(*s))
|
||||
if(*s++ != '0')
|
||||
return -ssgn;
|
||||
while(isdigit(*t))
|
||||
if(*t++ != '0')
|
||||
return tsgn;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
getword(Biobuf *f, Rune *rp, int n)
|
||||
{
|
||||
long c;
|
||||
|
||||
while(n-- > 0) {
|
||||
c = Bgetrune(f);
|
||||
if(c < 0)
|
||||
return 0;
|
||||
if(c == '\n') {
|
||||
*rp = '\0';
|
||||
return 1;
|
||||
}
|
||||
*rp++ = c;
|
||||
}
|
||||
fprint(2, "Look: word too long. Bailing out.\n");
|
||||
return 0;
|
||||
}
|
10
pbd/Makefile
Normal file
10
pbd/Makefile
Normal file
@@ -0,0 +1,10 @@
|
||||
# pbd - pbd unix port from plan9
|
||||
# Depends on ../lib9
|
||||
|
||||
TARG = pbd
|
||||
|
||||
include ../std.mk
|
||||
|
||||
pre-uninstall:
|
||||
|
||||
post-install:
|
19
pbd/pbd.c
Normal file
19
pbd/pbd.c
Normal file
@@ -0,0 +1,19 @@
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
|
||||
void
|
||||
main(void)
|
||||
{
|
||||
char buf[512], *p;
|
||||
|
||||
p = "???";
|
||||
if(getwd(buf, sizeof buf)){
|
||||
p = strrchr(buf, '/');
|
||||
if(p == nil)
|
||||
p = buf;
|
||||
else if(p>buf || p[1]!='\0')
|
||||
p++;
|
||||
}
|
||||
write(1, p, strlen(p));
|
||||
exits(0);
|
||||
}
|
@@ -46,7 +46,7 @@ uninstall:
|
||||
@${CC} ${CFLAGS} -I../lib9 -I${PREFIX}/include -I../lib9 $*.c
|
||||
|
||||
clean:
|
||||
rm -f ${OFILES} ${TARG} y.tab.c y.tab.h
|
||||
rm -f ${OFILES} ${TARG} y.tab.c y.tab.h x.tab.h
|
||||
|
||||
${TARG}: ${OFILES}
|
||||
@echo LD ${TARG}
|
||||
|
10
split/Makefile
Normal file
10
split/Makefile
Normal file
@@ -0,0 +1,10 @@
|
||||
# split - split unix port from plan9
|
||||
# Depends on ../lib9
|
||||
|
||||
TARG = split
|
||||
|
||||
include ../std.mk
|
||||
|
||||
pre-uninstall:
|
||||
|
||||
post-install:
|
82
split/split.1
Normal file
82
split/split.1
Normal file
@@ -0,0 +1,82 @@
|
||||
.TH SPLIT 1
|
||||
.CT 1 files
|
||||
.SH NAME
|
||||
split \- split a file into pieces
|
||||
.SH SYNOPSIS
|
||||
.B split
|
||||
[
|
||||
.I option ...
|
||||
]
|
||||
[
|
||||
.I file
|
||||
]
|
||||
.SH DESCRIPTION
|
||||
.I Split
|
||||
reads
|
||||
.I file
|
||||
(standard input by default)
|
||||
and writes it in pieces of 1000
|
||||
lines per output file.
|
||||
The names of the
|
||||
output files are
|
||||
.BR xaa ,
|
||||
.BR xab ,
|
||||
and so on to
|
||||
.BR xzz .
|
||||
The options are
|
||||
.TP
|
||||
.BI -n " n"
|
||||
Split into
|
||||
.IR n -line
|
||||
pieces.
|
||||
.TP
|
||||
.BI -l " n"
|
||||
Synonym for
|
||||
.B -n
|
||||
.IR n ,
|
||||
a nod to Unix's syntax.
|
||||
.TP
|
||||
.BI -e " expression"
|
||||
File divisions occur at each line
|
||||
that matches a regular
|
||||
.IR expression ;
|
||||
see
|
||||
.IR regexp (7).
|
||||
Multiple
|
||||
.B -e
|
||||
options may appear.
|
||||
If a subexpression of
|
||||
.I expression
|
||||
is contained in parentheses
|
||||
.BR ( ... ) ,
|
||||
the output file name is the portion of the
|
||||
line which matches the subexpression.
|
||||
.TP
|
||||
.BI -f " stem
|
||||
Use
|
||||
.I stem
|
||||
instead of
|
||||
.B x
|
||||
in output file names.
|
||||
.TP
|
||||
.BI -s " suffix
|
||||
Append
|
||||
.I suffix
|
||||
to names identified under
|
||||
.BR -e .
|
||||
.TP
|
||||
.B -x
|
||||
Exclude the matched input line from the output file.
|
||||
.TP
|
||||
.B -i
|
||||
Ignore case in option
|
||||
.BR -e ;
|
||||
force output file names (excluding the suffix)
|
||||
to lower case.
|
||||
.SH SOURCE
|
||||
.B \*9/src/cmd/split.c
|
||||
.SH SEE ALSO
|
||||
.IR sed (1),
|
||||
.IR awk (1),
|
||||
.IR grep (1),
|
||||
.IR regexp (7)
|
189
split/split.c
Normal file
189
split/split.c
Normal file
@@ -0,0 +1,189 @@
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <bio.h>
|
||||
#include <ctype.h>
|
||||
#include <regexp.h>
|
||||
|
||||
char digit[] = "0123456789";
|
||||
char *suffix = "";
|
||||
char *stem = "x";
|
||||
char suff[] = "aa";
|
||||
char name[200];
|
||||
Biobuf bout;
|
||||
Biobuf *output = &bout;
|
||||
|
||||
extern int nextfile(void);
|
||||
extern int matchfile(Resub*);
|
||||
extern void openf(void);
|
||||
extern char *fold(char*,int);
|
||||
extern void usage(void);
|
||||
extern void badexp(void);
|
||||
|
||||
void
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
Reprog *exp;
|
||||
char *pattern = 0;
|
||||
int n = 1000;
|
||||
char *line;
|
||||
int xflag = 0;
|
||||
int iflag = 0;
|
||||
Biobuf bin;
|
||||
Biobuf *b = &bin;
|
||||
char buf[256];
|
||||
|
||||
ARGBEGIN {
|
||||
case 'l':
|
||||
case 'n':
|
||||
n=atoi(EARGF(usage()));
|
||||
break;
|
||||
case 'e':
|
||||
pattern = strdup(EARGF(usage()));
|
||||
break;
|
||||
case 'f':
|
||||
stem = strdup(EARGF(usage()));
|
||||
break;
|
||||
case 's':
|
||||
suffix = strdup(EARGF(usage()));
|
||||
break;
|
||||
case 'x':
|
||||
xflag++;
|
||||
break;
|
||||
case 'i':
|
||||
iflag++;
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
break;
|
||||
|
||||
} ARGEND;
|
||||
|
||||
if(argc < 0 || argc > 1)
|
||||
usage();
|
||||
|
||||
if(argc != 0) {
|
||||
b = Bopen(argv[0], OREAD);
|
||||
if(b == nil) {
|
||||
fprint(2, "split: can't open %s: %r\n", argv[0]);
|
||||
exits("open");
|
||||
}
|
||||
} else
|
||||
Binit(b, 0, OREAD);
|
||||
|
||||
if(pattern) {
|
||||
if(!(exp = regcomp(iflag? fold(pattern,strlen(pattern)): pattern)))
|
||||
badexp();
|
||||
while((line=Brdline(b,'\n')) != 0) {
|
||||
Resub match[2];
|
||||
memset(match, 0, sizeof match);
|
||||
line[Blinelen(b)-1] = 0;
|
||||
if(regexec(exp,iflag?fold(line,Blinelen(b)-1):line,match,2)) {
|
||||
if(matchfile(match) && xflag)
|
||||
continue;
|
||||
} else if(output == 0)
|
||||
nextfile(); /* at most once */
|
||||
Bwrite(output, line, Blinelen(b)-1);
|
||||
Bputc(output, '\n');
|
||||
}
|
||||
} else {
|
||||
int linecnt = n;
|
||||
|
||||
while((line=Brdline(b,'\n')) != 0) {
|
||||
if(++linecnt > n) {
|
||||
nextfile();
|
||||
linecnt = 1;
|
||||
}
|
||||
Bwrite(output, line, Blinelen(b));
|
||||
}
|
||||
|
||||
/*
|
||||
* in case we didn't end with a newline, tack whatever's
|
||||
* left onto the last file
|
||||
*/
|
||||
while((n = Bread(b, buf, sizeof(buf))) > 0)
|
||||
Bwrite(output, buf, n);
|
||||
}
|
||||
if(b != nil)
|
||||
Bterm(b);
|
||||
exits(0);
|
||||
}
|
||||
|
||||
int
|
||||
nextfile(void)
|
||||
{
|
||||
static int canopen = 1;
|
||||
if(suff[0] > 'z') {
|
||||
if(canopen)
|
||||
fprint(2, "split: file %szz not split\n",stem);
|
||||
canopen = 0;
|
||||
} else {
|
||||
strcpy(name, stem);
|
||||
strcat(name, suff);
|
||||
if(++suff[1] > 'z')
|
||||
suff[1] = 'a', ++suff[0];
|
||||
openf();
|
||||
}
|
||||
return canopen;
|
||||
}
|
||||
|
||||
int
|
||||
matchfile(Resub *match)
|
||||
{
|
||||
if(match[1].s.sp) {
|
||||
int len = match[1].e.ep - match[1].s.sp;
|
||||
strncpy(name, match[1].s.sp, len);
|
||||
strcpy(name+len, suffix);
|
||||
openf();
|
||||
return 1;
|
||||
}
|
||||
return nextfile();
|
||||
}
|
||||
|
||||
void
|
||||
openf(void)
|
||||
{
|
||||
static int fd = 0;
|
||||
Bflush(output);
|
||||
Bterm(output);
|
||||
if(fd > 0)
|
||||
close(fd);
|
||||
fd = create(name,OWRITE,0666);
|
||||
if(fd < 0) {
|
||||
fprint(2, "grep: can't create %s: %r\n", name);
|
||||
exits("create");
|
||||
}
|
||||
Binit(output, fd, OWRITE);
|
||||
}
|
||||
|
||||
char *
|
||||
fold(char *s, int n)
|
||||
{
|
||||
static char *fline;
|
||||
static int linesize = 0;
|
||||
char *t;
|
||||
|
||||
if(linesize < n+1){
|
||||
fline = realloc(fline,n+1);
|
||||
linesize = n+1;
|
||||
}
|
||||
for(t=fline; *t++ = tolower((uchar)*s++); )
|
||||
continue;
|
||||
/* we assume the 'A'-'Z' only appear as themselves
|
||||
* in a utf encoding.
|
||||
*/
|
||||
return fline;
|
||||
}
|
||||
|
||||
void
|
||||
usage(void)
|
||||
{
|
||||
fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i] [file]\n");
|
||||
exits("usage");
|
||||
}
|
||||
|
||||
void
|
||||
badexp(void)
|
||||
{
|
||||
fprint(2, "split: bad regular expression\n");
|
||||
exits("bad regular expression");
|
||||
}
|
10
strings/Makefile
Normal file
10
strings/Makefile
Normal file
@@ -0,0 +1,10 @@
|
||||
# strings - strings unix port from plan9
|
||||
# Depends on ../lib9
|
||||
|
||||
TARG = strings
|
||||
|
||||
include ../std.mk
|
||||
|
||||
pre-uninstall:
|
||||
|
||||
post-install:
|
28
strings/strings.1
Normal file
28
strings/strings.1
Normal file
@@ -0,0 +1,28 @@
|
||||
.TH STRINGS 1
|
||||
.SH NAME
|
||||
strings \- extract printable strings
|
||||
.SH SYNOPSIS
|
||||
.B strings
|
||||
[
|
||||
.I file ...
|
||||
]
|
||||
.SH DESCRIPTION
|
||||
.I Strings
|
||||
finds and prints strings containing 6 or more
|
||||
consecutive printable UTF-encoded characters
|
||||
in a (typically) binary file, default
|
||||
standard input.
|
||||
Printable characters are taken to be
|
||||
.SM ASCII
|
||||
characters from blank through tilde (hexadecimal 20 through 7E), inclusive,
|
||||
and
|
||||
all other characters from value 00A0 to FFFF.
|
||||
Strings reports
|
||||
the decimal offset within the file at which the string starts and the text
|
||||
of the string. If the string is longer than 70 runes the line is
|
||||
terminated by three dots and the printing is resumed on the next
|
||||
line with the offset of the continuation line.
|
||||
.SH SOURCE
|
||||
.B \*9/src/cmd/strings.c
|
||||
.SH SEE ALSO
|
||||
.IR nm (1)
|
90
strings/strings.c
Normal file
90
strings/strings.c
Normal file
@@ -0,0 +1,90 @@
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <bio.h>
|
||||
|
||||
Biobuf *fin;
|
||||
Biobuf fout;
|
||||
|
||||
#define MINSPAN 6 /* Min characters in string */
|
||||
|
||||
#define BUFSIZE 70
|
||||
|
||||
void stringit(char *);
|
||||
#undef isprint
|
||||
#define isprint risprint
|
||||
int isprint(Rune);
|
||||
|
||||
void
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
int i;
|
||||
|
||||
Binit(&fout, 1, OWRITE);
|
||||
if(argc < 2) {
|
||||
stringit("/dev/stdin");
|
||||
exits(0);
|
||||
}
|
||||
|
||||
for(i = 1; i < argc; i++) {
|
||||
if(argc > 2)
|
||||
print("%s:\n", argv[i]);
|
||||
|
||||
stringit(argv[i]);
|
||||
}
|
||||
|
||||
exits(0);
|
||||
}
|
||||
|
||||
void
|
||||
stringit(char *str)
|
||||
{
|
||||
long posn, start;
|
||||
int cnt = 0;
|
||||
long c;
|
||||
|
||||
Rune buf[BUFSIZE];
|
||||
|
||||
if ((fin = Bopen(str, OREAD)) == 0) {
|
||||
perror("open");
|
||||
return;
|
||||
}
|
||||
|
||||
start = 0;
|
||||
posn = Boffset(fin);
|
||||
while((c = Bgetrune(fin)) >= 0) {
|
||||
if(isprint(c)) {
|
||||
if(start == 0)
|
||||
start = posn;
|
||||
buf[cnt++] = c;
|
||||
if(cnt == BUFSIZE-1) {
|
||||
buf[cnt] = 0;
|
||||
Bprint(&fout, "%8ld: %S ...\n", start, buf);
|
||||
start = 0;
|
||||
cnt = 0;
|
||||
}
|
||||
} else {
|
||||
if(cnt >= MINSPAN) {
|
||||
buf[cnt] = 0;
|
||||
Bprint(&fout, "%8ld: %S\n", start, buf);
|
||||
}
|
||||
start = 0;
|
||||
cnt = 0;
|
||||
}
|
||||
posn = Boffset(fin);
|
||||
}
|
||||
|
||||
if(cnt >= MINSPAN){
|
||||
buf[cnt] = 0;
|
||||
Bprint(&fout, "%8ld: %S\n", start, buf);
|
||||
}
|
||||
Bterm(fin);
|
||||
}
|
||||
|
||||
int
|
||||
isprint(Rune r)
|
||||
{
|
||||
if ((r >= ' ' && r <0x7f) || r > 0xA0)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
10
unicode/Makefile
Normal file
10
unicode/Makefile
Normal file
@@ -0,0 +1,10 @@
|
||||
# unicode - unicode unix port from plan9
|
||||
# Depends on ../lib9
|
||||
|
||||
TARG = unicode
|
||||
|
||||
include ../std.mk
|
||||
|
||||
pre-uninstall:
|
||||
|
||||
post-install:
|
0
unicode/unicode.1
Normal file
0
unicode/unicode.1
Normal file
122
unicode/unicode.c
Normal file
122
unicode/unicode.c
Normal file
@@ -0,0 +1,122 @@
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <bio.h>
|
||||
|
||||
char usage[] = "unicode { [-t] hex hex ... | hexmin-hexmax ... | [-n] char ... }";
|
||||
char hex[] = "0123456789abcdefABCDEF";
|
||||
int numout = 0;
|
||||
int text = 0;
|
||||
char *err;
|
||||
Biobuf bout;
|
||||
|
||||
char *range(char*[]);
|
||||
char *nums(char*[]);
|
||||
char *chars(char*[]);
|
||||
|
||||
void
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
ARGBEGIN{
|
||||
case 'n':
|
||||
numout = 1;
|
||||
break;
|
||||
case 't':
|
||||
text = 1;
|
||||
break;
|
||||
}ARGEND
|
||||
Binit(&bout, 1, OWRITE);
|
||||
if(argc == 0){
|
||||
fprint(2, "usage: %s\n", usage);
|
||||
exits("usage");
|
||||
}
|
||||
if(!numout && utfrune(argv[0], '-'))
|
||||
exits(range(argv));
|
||||
if(numout || strchr(hex, argv[0][0])==0)
|
||||
exits(nums(argv));
|
||||
exits(chars(argv));
|
||||
}
|
||||
|
||||
char*
|
||||
range(char *argv[])
|
||||
{
|
||||
char *q;
|
||||
int min, max;
|
||||
int i;
|
||||
|
||||
while(*argv){
|
||||
q = *argv;
|
||||
if(strchr(hex, q[0]) == 0){
|
||||
err:
|
||||
fprint(2, "unicode: bad range %s\n", *argv);
|
||||
return "bad range";
|
||||
}
|
||||
min = strtoul(q, &q, 16);
|
||||
if(min<0 || min>Runemax || *q!='-')
|
||||
goto err;
|
||||
q++;
|
||||
if(strchr(hex, *q) == 0)
|
||||
goto err;
|
||||
max = strtoul(q, &q, 16);
|
||||
if(max<0 || max>Runemax || max<min || *q!=0)
|
||||
goto err;
|
||||
i = 0;
|
||||
do{
|
||||
Bprint(&bout, "%.4x %C", min, min);
|
||||
i++;
|
||||
if(min==max || (i&7)==0)
|
||||
Bprint(&bout, "\n");
|
||||
else
|
||||
Bprint(&bout, "\t");
|
||||
min++;
|
||||
}while(min<=max);
|
||||
argv++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
char*
|
||||
nums(char *argv[])
|
||||
{
|
||||
char *q;
|
||||
Rune r;
|
||||
int w;
|
||||
|
||||
while(*argv){
|
||||
q = *argv;
|
||||
while(*q){
|
||||
w = chartorune(&r, q);
|
||||
if(r==0x80 && (q[0]&0xFF)!=0x80){
|
||||
fprint(2, "unicode: invalid utf string %s\n", *argv);
|
||||
return "bad utf";
|
||||
}
|
||||
Bprint(&bout, "%.4x\n", r);
|
||||
q += w;
|
||||
}
|
||||
argv++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
char*
|
||||
chars(char *argv[])
|
||||
{
|
||||
char *q;
|
||||
int m;
|
||||
|
||||
while(*argv){
|
||||
q = *argv;
|
||||
if(strchr(hex, q[0]) == 0){
|
||||
err:
|
||||
fprint(2, "unicode: bad unicode value %s\n", *argv);
|
||||
return "bad char";
|
||||
}
|
||||
m = strtoul(q, &q, 16);
|
||||
if(m<0 || m>Runemax || *q!=0)
|
||||
goto err;
|
||||
Bprint(&bout, "%C", m);
|
||||
if(!text)
|
||||
Bprint(&bout, "\n");
|
||||
argv++;
|
||||
}
|
||||
return 0;
|
||||
}
|
10
unutf/Makefile
Normal file
10
unutf/Makefile
Normal file
@@ -0,0 +1,10 @@
|
||||
# unutf - unutf unix port from plan9
|
||||
# Depends on ../lib9
|
||||
|
||||
TARG = unutf
|
||||
|
||||
include ../std.mk
|
||||
|
||||
pre-uninstall:
|
||||
|
||||
post-install:
|
0
unutf/unutf.1
Normal file
0
unutf/unutf.1
Normal file
20
unutf/unutf.c
Normal file
20
unutf/unutf.c
Normal file
@@ -0,0 +1,20 @@
|
||||
/*
|
||||
* stupid little program to pipe unicode chars through
|
||||
* when converting to non-utf compilers.
|
||||
*/
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <bio.h>
|
||||
|
||||
Biobuf bin;
|
||||
|
||||
void
|
||||
main(void)
|
||||
{
|
||||
int c;
|
||||
|
||||
Binit(&bin, 0, OREAD);
|
||||
while((c = Bgetrune(&bin)) >= 0)
|
||||
print("0x%ux\n", c);
|
||||
exits(0);
|
||||
}
|
Reference in New Issue
Block a user