[sci.lang.japan] Latest version of VALUES.C (with automatic KANJI code detection)
Ken R. Lunde
KLUNDE at VMS.MACC.WISC.EDU
Tue Oct 16 08:14:27 AEST 1990
Archive-name: kanji-values/13-Oct-90
Original-posting-by: KLUNDE at VMS.MACC.WISC.EDU ("Ken R. Lunde")
Original-subject: Latest version of VALUES.C (with automatic KANJI code detection)
Reposted-by: emv at math.lsa.umich.edu (Edward Vielmetti)
[Reposted from sci.lang.japan.
Comments on this service to emv at math.lsa.umich.edu (Edward Vielmetti).]
/* VALUES.C version of 14 October 1990 */
/* A utility for displaying the values of Japanese characters. */
/* Written by Ken R. Lunde, University of Wisconsin-Madison */
/* EMAIL: klunde at vms.macc.wisc.edu */
/* Available at the ucdavis.edu (128.120.2.1) FTP archive in pub/JIS/C. */
/* I do not consider myself to be a very advanced programmer, but perhaps one */
/* other person may have a use for this program. Please feel free to use this */
/* source code anyway you wish. The conversion algorithms for the major codes */
/* for Japanese are used, and are very reusable. The algorithm which detects */
/* the input file's Japanese code automatically is also quite useful. */
/* This program was written as a tool for determining the values for Japanese */
/* and ASCII characters. It is written in ANSI C, so should be compilable on */
/* almost any platform, but I do not offer any guarantees. :-) */
/* This version accepts SHIFT-JIS, EUC, or the 7-bit JIS codes as valid input */
/* for the file it reads. This program automatically detects which KANJI code */
/* is used in the input file. The output file will use the same code that the */
/* input file used. */
/* This program creates a file containing the contents of the input file, and */
/* displays each character's SHIFT-JIS, EUC, and JIS values in one of three */
/* different styles: octal, decimal, or hexdecimal -- the user must specify */
/* which one to use. ASCII and KUTEN values are also given. A tab separates */
/* the fields in the output file. I find that a tab width of 14 characters is */
/* best when printing. The SJIS, EUC, and JIS columns are padded with zero's */
/* for octal and decimal output. This makes the output more "readable." */
/* For SHIFT-JIS input files only, half-size KATAKANA are treated. Only their */
/* ASCII value is displayed since they are single-byte characters. Printable */
/* ASCII characters are handled with all the Japanese codes. */
/* Please send comments and suggestions! ENJOY! */
#include <stdio.h>
int DetectCodeType(FILE *in);
int fclose(FILE *fp);
int isodd(int number);
void exit(int data);
void Introduction(FILE *out,int choice,int code);
void print1byte(FILE *out,int choice,int one);
void print2byte(FILE *out,int code,int choice,int one,int two,int data[8]);
void seven2shift(int *ptr1,int *ptr2);
void shift2seven(int *ptr1,int *ptr2);
void Skip_ESC_Seq(FILE *in,int data,int *ptr);
void TreatEUC(FILE *in,FILE *out,int code,int choice);
void TreatJIS(FILE *in,FILE *out,int code,int choice);
void TreatSJIS(FILE *in,FILE *out,int code,int choice);
#define NOT_SET 0
#define NEW 1
#define OLD 2
#define NEC 3
#define EUC 4
#define SJIS 5
#define TRUE 1
#define FALSE 0
#define ESC 27
#define SJIS1 0
#define SJIS2 1
#define EUC1 2
#define EUC2 3
#define JIS1 4
#define JIS2 5
#define KT1 6
#define KT2 7
#define OCT 8
#define DEC 10
#define HEX 16
#define KI_NEW "$B"
#define KO_NEW "(J"
#define KI_OLD "$@"
#define KO_OLD "(J"
#define KI_NEC "K"
#define KO_NEC "H"
main()
{
FILE *in,*out;
int code,choice;
char infilename[80],outfilename[80];
printf("\nInfile name -> ");
gets(infilename);
if ((in = fopen(infilename,"r")) == NULL) {
printf("\nCannot open %s",infilename);
exit(1);
}
if ((code = DetectCodeType(in)) == NOT_SET) {
printf("\nNo KANJI code detected in %s",infilename);
exit(1);
}
if ((in = fopen(infilename,"r"))==NULL) {
printf("\nCannot open %s",infilename);
exit(1);
}
printf("Outfile name -> ");
gets(outfilename);
if ((out = fopen(outfilename,"w"))==NULL) {
printf("\nCannot open %s",outfilename);
exit(1);
}
printf("Output (8 = octal, 10 = decimal, 16 = hexadecimal) -> ");
scanf("%d",&choice);
if ((choice != OCT) && (choice != DEC) && (choice != HEX)) {
printf("\nInvalid choice! Bye!");
exit(1);
}
Introduction(out,choice,code);
switch (code) {
case SJIS :
TreatSJIS(in,out,code,choice);
break;
case EUC :
TreatEUC(in,out,code,choice);
break;
case NEW :
case OLD :
case NEC :
TreatJIS(in,out,code,choice);
break;
}
fclose(out);
fclose(in);
return 0;
}
int DetectCodeType(FILE *in)
{
int p1,p2,p3,whatcode;
whatcode = NOT_SET;
while (((p1 = getc(in)) != EOF) && (whatcode == NOT_SET)) {
if (p1 == ESC) {
p2 = getc(in);
if (p2 == '$') {
p3 = getc(in);
if (p3 == 'B')
whatcode = NEW;
else if (p3 == '@')
whatcode = OLD;
}
else if (p2 == 'K')
whatcode = NEC;
}
else if ((p1 >= 129) && (p1 <= 254)) {
p2 = getc(in);
if (((p1 >= 129) && (p1 <= 159)) && ((p2 >= 64) && (p2 <= 160)))
whatcode = SJIS;
else if (((p1 >= 161) && (p1 <= 254)) && ((p2 >= 161) && (p2 <= 254)))
whatcode = EUC;
}
}
fclose(in);
return whatcode;
}
int isodd(int number)
{
return ((number % 2) ? 1 : 0);
}
void Introduction(FILE *out,int choice,int code)
{
switch (choice) {
case OCT :
fprintf(out,"Character values (in octal):\n\n");
break;
case DEC :
fprintf(out,"Character values (in decimal):\n\n");
break;
case HEX :
fprintf(out,"Character values (in hexadecimal):\n\n");
break;
}
switch (code) {
case SJIS :
fprintf(out,"Output KANJI code will be SHIFT-JIS\n\n");
break;
case EUC :
fprintf(out,"Output KANJI code will be EUC\n\n");
break;
case NEW :
fprintf(out,"Output KANJI code will be JIS 7-bit (NEW-JIS)\n\n");
break;
case OLD :
fprintf(out,"Output KANJI code will be JIS 7-bit (OLD-JIS)\n\n");
break;
case NEC :
fprintf(out,"Output KANJI code will be JIS 7-bit (NEC-JIS)\n\n");
break;
}
fprintf(out,"CHARACTER\tSHIFT-JIS or\tEUC\tJIS\tASCII\tKUTEN\n");
fprintf(out,"\tsingle-byte\n\n");
}
void print1byte(FILE *out,int choice,int one)
{
switch (choice) {
case OCT :
fprintf(out,"%c\t%03o\n",one,one);
break;
case DEC :
fprintf(out,"%c\t%03d\n",one,one);
break;
case HEX :
fprintf(out,"%c\t%X\n",one,one);
break;
}
}
void print2byte(FILE *out,int code,int choice,int one,int two,int data[8])
{
switch (code) {
case NEW :
fprintf(out,"%c%s%c%c%c%s\t",ESC,KI_NEW,one,two,ESC,KO_NEW);
break;
case OLD :
fprintf(out,"%c%s%c%c%c%s\t",ESC,KI_OLD,one,two,ESC,KO_OLD);
break;
case NEC :
fprintf(out,"%c%s%c%c%c%s\t",ESC,KI_NEC,one,two,ESC,KO_NEC);
break;
default :
fprintf(out,"%c%c\t",one,two);
break;
}
switch (choice) {
case OCT :
fprintf(out,"%03o-%03o\t",data[SJIS1],data[SJIS2]);
fprintf(out,"%03o-%03o\t",data[EUC1],data[EUC2]);
fprintf(out,"%03o-%03o\t",data[JIS1],data[JIS2]);
break;
case DEC :
fprintf(out,"%03d-%03d\t",data[SJIS1],data[SJIS2]);
fprintf(out,"%03d-%03d\t",data[EUC1],data[EUC2]);
fprintf(out,"%03d-%03d\t",data[JIS1],data[JIS2]);
break;
case HEX :
fprintf(out,"%X-%X\t",data[SJIS1],data[SJIS2]);
fprintf(out,"%X-%X\t",data[EUC1],data[EUC2]);
fprintf(out,"%X-%X\t",data[JIS1],data[JIS2]);
break;
}
fprintf(out,"%c%c\t",data[JIS1],data[JIS2]);
fprintf(out,"%02d-%02d\n",data[KT1],data[KT2]);
}
void seven2shift (int *p1,int *p2)
{
if (isodd(*p1))
*p2 += 31;
else
*p2 += 126;
if ((*p2 >= 127) && (*p2 < 158))
(*p2)++;
if ((*p1 >= 33) && (*p1 <= 94)) {
if (isodd(*p1))
*p1 = ((*p1 - 1) / 2) + 113;
else if (!isodd(*p1))
*p1 = (*p1 / 2) + 112;
}
else if ((*p1 >= 95) && (*p1 <= 126)) {
if (isodd(*p1))
*p1 = ((*p1 - 1) / 2) + 177;
else if (!isodd(*p1))
*p1 = (*p1 / 2) + 176;
}
}
void shift2seven(int *p1,int *p2)
{
int temp;
temp = *p2;
if ((*p2 >= 64) && (*p2 <= 158))
*p2 -= 31;
else if ((*p2 >= 159) && (*p2 <= 252))
*p2 -= 126;
if ((temp > 127) && (temp <= 158))
(*p2)--;
if ((*p1 >= 129) && (*p1 <= 159) && (temp >= 64) && (temp <= 158))
*p1 = ((*p1 - 113) * 2) + 1;
else if ((*p1 >= 129) && (*p1 <= 159) && (temp >= 159) && (temp <= 252))
*p1 = (*p1 - 112) * 2;
else if ((*p1 >= 224) && (*p1 <= 239) && (temp >= 64) && (temp <= 158))
*p1 = ((*p1 - 177) * 2) + 1;
else if ((*p1 >= 224) && (*p1 <= 239) && (temp >= 159) && (temp <= 252))
*p1 = (*p1 - 176) * 2;
}
void Skip_ESC_Seq(FILE *in,int temp,int *shifted_in)
{
int junk;
if ((temp == '$') || (temp == '('))
junk = getc(in);
if ((temp == 'K') || (temp == '$'))
*shifted_in = TRUE;
else
*shifted_in = FALSE;
}
void TreatEUC(FILE *in,FILE *out,int code,int choice)
{
int one,two;
int data[8];
while ((one = getc(in)) != EOF) {
if ((one >= 161) && (one <= 254)) {
two = getc(in);
data[SJIS1] = data[EUC1] = data[JIS1] = data[KT1] = one;
data[SJIS2] = data[EUC2] = data[JIS2] = data[KT2] = two;
data[SJIS1] -= 128;
data[SJIS2] -= 128;
seven2shift(&data[SJIS1],&data[SJIS2]);
data[JIS1] -= 128;
data[JIS2] -= 128;
data[KT1] -= 160;
data[KT2] -= 160;
print2byte(out,code,choice,one,two,data);
}
else if ((one >= 33) && (one <= 126))
print1byte(out,choice,one);
}
}
void TreatJIS(FILE *in,FILE *out,int code,int choice)
{
int shifted_in,temp,one,two;
int data[8];
shifted_in = FALSE;
while ((one = getc(in)) != EOF) {
if (one == ESC) {
temp = getc(in);
Skip_ESC_Seq(in,temp,&shifted_in);
if ((one = getc(in)) == EOF)
exit(1);
}
if (shifted_in) {
two = getc(in);
data[SJIS1] = data[EUC1] = data[JIS1] = data[KT1] = one;
data[SJIS2] = data[EUC2] = data[JIS2] = data[KT2] = two;
seven2shift(&data[SJIS1],&data[SJIS2]);
data[EUC1] += 128;
data[EUC2] += 128;
data[KT1] -= 32;
data[KT2] -= 32;
print2byte(out,code,choice,one,two,data);
}
else if ((!shifted_in) && ((one >= 33) && (one <= 126)))
print1byte(out,choice,one);
}
}
void TreatSJIS(FILE *in,FILE *out,int code,int choice)
{
int one,two;
int data[8];
while ((one = getc(in)) != EOF) {
if (((one >= 129) && (one <= 159)) || ((one >= 224) && (one <= 239))) {
two = getc(in);
data[SJIS1] = data[EUC1] = data[JIS1] = data[KT1] = one;
data[SJIS2] = data[EUC2] = data[JIS2] = data[KT2] = two;
shift2seven(&data[EUC1],&data[EUC2]);
data[EUC1] += 128;
data[EUC2] += 128;
shift2seven(&data[JIS1],&data[JIS2]);
shift2seven(&data[KT1],&data[KT2]);
data[KT1] -= 32;
data[KT2] -= 32;
print2byte(out,code,choice,one,two,data);
}
else if (((one >= 33) && (one <= 126)) || ((one >= 161) && (one <= 223)))
print1byte(out,choice,one);
}
}
More information about the Alt.sources
mailing list