[sci.lang.japan] Latest version of VALUES.C (with automatic KANJI code detection)

Ken R. Lunde KLUNDE at VMS.MACC.WISC.EDU
Tue Oct 16 08:14:27 AEST 1990


Archive-name: kanji-values/13-Oct-90
Original-posting-by: KLUNDE at VMS.MACC.WISC.EDU ("Ken R. Lunde")
Original-subject: Latest version of VALUES.C (with automatic KANJI code detection)
Reposted-by: emv at math.lsa.umich.edu (Edward Vielmetti)

[Reposted from sci.lang.japan.
Comments on this service to emv at math.lsa.umich.edu (Edward Vielmetti).]

/* VALUES.C version of 14 October 1990 */
/* A utility for displaying the values of Japanese characters. */
/* Written by Ken R. Lunde, University of Wisconsin-Madison */
/* EMAIL: klunde at vms.macc.wisc.edu */
/* Available at the ucdavis.edu (128.120.2.1) FTP archive in pub/JIS/C. */

/* I do not consider myself to be a very advanced programmer, but perhaps one */
/* other person may have a use for this program. Please feel free to use this */
/* source code anyway you wish. The conversion algorithms for the major codes */
/* for Japanese are used, and are very reusable.  The algorithm which detects */
/* the input file's Japanese code automatically is also quite useful. */

/* This program was written as a tool for determining the values for Japanese */
/* and ASCII characters.  It is written in ANSI C, so should be compilable on */
/* almost any platform, but I do not offer any guarantees. :-) */

/* This version accepts SHIFT-JIS, EUC, or the 7-bit JIS codes as valid input */
/* for the file it reads. This program automatically detects which KANJI code */
/* is used in the input file. The output file will use the same code that the */
/* input file used. */

/* This program creates a file containing the contents of the input file, and */
/* displays each character's SHIFT-JIS,  EUC,  and JIS values in one of three */
/* different styles:   octal, decimal, or hexdecimal -- the user must specify */
/* which one to use. ASCII and KUTEN values are also given.   A tab separates */
/* the fields in the output file. I find that a tab width of 14 characters is */
/* best when printing.  The SJIS, EUC, and JIS columns are padded with zero's */
/* for octal and decimal output. This makes the output more "readable." */

/* For SHIFT-JIS input files only, half-size KATAKANA are treated. Only their */
/* ASCII value is displayed since they are single-byte characters.  Printable */
/* ASCII characters are handled with all the Japanese codes. */

/* Please send comments and suggestions! ENJOY! */

#include <stdio.h>

int DetectCodeType(FILE *in);
int fclose(FILE *fp);
int isodd(int number);
void exit(int data);
void Introduction(FILE *out,int choice,int code);
void print1byte(FILE *out,int choice,int one);
void print2byte(FILE *out,int code,int choice,int one,int two,int data[8]);
void seven2shift(int *ptr1,int *ptr2);
void shift2seven(int *ptr1,int *ptr2);
void Skip_ESC_Seq(FILE *in,int data,int *ptr);
void TreatEUC(FILE *in,FILE *out,int code,int choice);
void TreatJIS(FILE *in,FILE *out,int code,int choice);
void TreatSJIS(FILE *in,FILE *out,int code,int choice);

#define NOT_SET       0
#define NEW           1
#define OLD           2
#define NEC           3
#define EUC           4
#define SJIS          5
#define TRUE          1
#define FALSE         0
#define ESC          27
#define SJIS1         0
#define SJIS2         1
#define EUC1          2
#define EUC2          3
#define JIS1          4
#define JIS2          5
#define KT1           6
#define KT2           7
#define OCT           8
#define DEC          10
#define HEX          16
#define KI_NEW     "$B"
#define KO_NEW     "(J"
#define KI_OLD     "$@"
#define KO_OLD     "(J"
#define KI_NEC      "K"
#define KO_NEC      "H"

main()
{
  FILE *in,*out;
  int code,choice;
  char infilename[80],outfilename[80];

  printf("\nInfile name  -> ");
  gets(infilename);
  if ((in = fopen(infilename,"r")) == NULL) {
    printf("\nCannot open %s",infilename);
    exit(1);
  }
  if ((code = DetectCodeType(in)) == NOT_SET) {
    printf("\nNo KANJI code detected in %s",infilename);
    exit(1);
  }
  if ((in = fopen(infilename,"r"))==NULL) {
    printf("\nCannot open %s",infilename);
    exit(1);
  }
  printf("Outfile name -> ");
  gets(outfilename);
  if ((out = fopen(outfilename,"w"))==NULL) {
    printf("\nCannot open %s",outfilename);
    exit(1);
  }
  printf("Output (8 = octal, 10 = decimal, 16 = hexadecimal) -> ");
  scanf("%d",&choice);
  if ((choice != OCT) && (choice != DEC) && (choice != HEX)) {
    printf("\nInvalid choice! Bye!");
    exit(1);
  }
  Introduction(out,choice,code);
  switch (code) {
    case SJIS :
      TreatSJIS(in,out,code,choice);
      break;
    case EUC :
      TreatEUC(in,out,code,choice);
      break;
    case NEW :
    case OLD :
    case NEC :
      TreatJIS(in,out,code,choice);
      break;
  }
  fclose(out);
  fclose(in);
  return 0;
}

int DetectCodeType(FILE *in)
{
  int p1,p2,p3,whatcode;

  whatcode = NOT_SET;
  while (((p1 = getc(in)) != EOF) && (whatcode == NOT_SET)) {
    if (p1 == ESC) {
      p2 = getc(in);
      if (p2 == '$') {
        p3 = getc(in);
        if (p3 == 'B')
          whatcode = NEW;
        else if (p3 == '@')
          whatcode = OLD;
      }
      else if (p2 == 'K')
        whatcode = NEC;
    }
    else if ((p1 >= 129) && (p1 <= 254)) {
      p2 = getc(in);
      if (((p1 >= 129) && (p1 <= 159)) && ((p2 >= 64) && (p2 <= 160)))
        whatcode = SJIS;
      else if (((p1 >= 161) && (p1 <= 254)) && ((p2 >= 161) && (p2 <= 254)))
        whatcode = EUC;
    }
  }
  fclose(in);
  return whatcode;
}

int isodd(int number)
{
  return ((number % 2) ? 1 : 0);
}

void Introduction(FILE *out,int choice,int code)
{
  switch (choice) {
    case OCT :
      fprintf(out,"Character values (in octal):\n\n");
      break;
    case DEC :
      fprintf(out,"Character values (in decimal):\n\n");
      break;
    case HEX :
      fprintf(out,"Character values (in hexadecimal):\n\n");
      break;
  }
  switch (code) {
    case SJIS :
      fprintf(out,"Output KANJI code will be SHIFT-JIS\n\n");
      break;
    case EUC :
      fprintf(out,"Output KANJI code will be EUC\n\n");
      break;
    case NEW :
      fprintf(out,"Output KANJI code will be JIS 7-bit (NEW-JIS)\n\n");
      break;
    case OLD :
      fprintf(out,"Output KANJI code will be JIS 7-bit (OLD-JIS)\n\n");
      break;
    case NEC :
      fprintf(out,"Output KANJI code will be JIS 7-bit (NEC-JIS)\n\n");
      break;
  }
  fprintf(out,"CHARACTER\tSHIFT-JIS or\tEUC\tJIS\tASCII\tKUTEN\n");
  fprintf(out,"\tsingle-byte\n\n");
}

void print1byte(FILE *out,int choice,int one)
{
  switch (choice) {
    case OCT :
      fprintf(out,"%c\t%03o\n",one,one);
      break;
    case DEC :
      fprintf(out,"%c\t%03d\n",one,one);
      break;
    case HEX :
      fprintf(out,"%c\t%X\n",one,one);
      break;
  }
}

void print2byte(FILE *out,int code,int choice,int one,int two,int data[8])
{
  switch (code) {
    case NEW :
      fprintf(out,"%c%s%c%c%c%s\t",ESC,KI_NEW,one,two,ESC,KO_NEW);
      break;
    case OLD :
      fprintf(out,"%c%s%c%c%c%s\t",ESC,KI_OLD,one,two,ESC,KO_OLD);
      break;
    case NEC :
      fprintf(out,"%c%s%c%c%c%s\t",ESC,KI_NEC,one,two,ESC,KO_NEC);
      break;
    default :
      fprintf(out,"%c%c\t",one,two);
      break;
  }
  switch (choice) {
    case OCT :
      fprintf(out,"%03o-%03o\t",data[SJIS1],data[SJIS2]);
      fprintf(out,"%03o-%03o\t",data[EUC1],data[EUC2]);
      fprintf(out,"%03o-%03o\t",data[JIS1],data[JIS2]);
      break;
    case DEC :
      fprintf(out,"%03d-%03d\t",data[SJIS1],data[SJIS2]);
      fprintf(out,"%03d-%03d\t",data[EUC1],data[EUC2]);
      fprintf(out,"%03d-%03d\t",data[JIS1],data[JIS2]);
      break;
    case HEX :
      fprintf(out,"%X-%X\t",data[SJIS1],data[SJIS2]);
      fprintf(out,"%X-%X\t",data[EUC1],data[EUC2]);
      fprintf(out,"%X-%X\t",data[JIS1],data[JIS2]);
      break;
  }
  fprintf(out,"%c%c\t",data[JIS1],data[JIS2]);
  fprintf(out,"%02d-%02d\n",data[KT1],data[KT2]);
}

void seven2shift (int *p1,int *p2)
{
  if (isodd(*p1))
    *p2 += 31;
  else
    *p2 += 126;
  if ((*p2 >= 127) && (*p2 < 158))
    (*p2)++;
  if ((*p1 >= 33) && (*p1 <= 94)) {
    if (isodd(*p1))
      *p1 = ((*p1 - 1) / 2) + 113;
    else if (!isodd(*p1))
      *p1 = (*p1 / 2) + 112;
  }
  else if ((*p1 >= 95) && (*p1 <= 126)) {
    if (isodd(*p1))
      *p1 = ((*p1 - 1) / 2) + 177;
    else if (!isodd(*p1))
      *p1 = (*p1 / 2) + 176;
  }
}

void shift2seven(int *p1,int *p2)
{
  int temp;

  temp = *p2;
  if ((*p2 >= 64) && (*p2 <= 158))
    *p2 -= 31;
  else if ((*p2 >= 159) && (*p2 <= 252))
    *p2 -= 126;
  if ((temp > 127) && (temp <= 158))
    (*p2)--;
  if ((*p1 >= 129) && (*p1 <= 159) && (temp >= 64) && (temp <= 158))
    *p1 = ((*p1 - 113) * 2) + 1;
  else if ((*p1 >= 129) && (*p1 <= 159) && (temp >= 159) && (temp <= 252))
    *p1 = (*p1 - 112) * 2;
  else if ((*p1 >= 224) && (*p1 <= 239) && (temp >= 64) && (temp <= 158))
    *p1 = ((*p1 - 177) * 2) + 1;
  else if ((*p1 >= 224) && (*p1 <= 239) && (temp >= 159) && (temp <= 252))
    *p1 = (*p1 - 176) * 2;
}

void Skip_ESC_Seq(FILE *in,int temp,int *shifted_in)
{
  int junk;

  if ((temp == '$') || (temp == '('))
    junk = getc(in);
  if ((temp == 'K') || (temp == '$'))
    *shifted_in = TRUE;
  else
    *shifted_in = FALSE;
}

void TreatEUC(FILE *in,FILE *out,int code,int choice)
{
  int one,two;
  int data[8];

  while ((one = getc(in)) != EOF) {
    if ((one >= 161) && (one <= 254)) {
      two = getc(in);
      data[SJIS1] = data[EUC1] = data[JIS1] = data[KT1] = one;
      data[SJIS2] = data[EUC2] = data[JIS2] = data[KT2] = two;
      data[SJIS1] -= 128;
      data[SJIS2] -= 128;
      seven2shift(&data[SJIS1],&data[SJIS2]);
      data[JIS1] -= 128;
      data[JIS2] -= 128;
      data[KT1] -= 160;
      data[KT2] -= 160;
      print2byte(out,code,choice,one,two,data);
    }
    else if ((one >= 33) && (one <= 126))
      print1byte(out,choice,one);
  }
}

void TreatJIS(FILE *in,FILE *out,int code,int choice)
{
  int shifted_in,temp,one,two;
  int data[8];

  shifted_in = FALSE;
  while ((one = getc(in)) != EOF) {
    if (one == ESC) {
      temp = getc(in);
      Skip_ESC_Seq(in,temp,&shifted_in);
      if ((one = getc(in)) == EOF)
        exit(1);
    }
    if (shifted_in) {
      two = getc(in);
      data[SJIS1] = data[EUC1] = data[JIS1] = data[KT1] = one;
      data[SJIS2] = data[EUC2] = data[JIS2] = data[KT2] = two;
      seven2shift(&data[SJIS1],&data[SJIS2]);
      data[EUC1] += 128;
      data[EUC2] += 128;
      data[KT1] -= 32;
      data[KT2] -= 32;
      print2byte(out,code,choice,one,two,data);
    }
    else if ((!shifted_in) && ((one >= 33) && (one <= 126)))
      print1byte(out,choice,one);
  }
}

void TreatSJIS(FILE *in,FILE *out,int code,int choice)
{
  int one,two;
  int data[8];

  while ((one = getc(in)) != EOF) {
    if (((one >= 129) && (one <= 159)) || ((one >= 224) && (one <= 239))) {
      two = getc(in);
      data[SJIS1] = data[EUC1] = data[JIS1] = data[KT1] = one;
      data[SJIS2] = data[EUC2] = data[JIS2] = data[KT2] = two;
      shift2seven(&data[EUC1],&data[EUC2]);
      data[EUC1] += 128;
      data[EUC2] += 128;
      shift2seven(&data[JIS1],&data[JIS2]);
      shift2seven(&data[KT1],&data[KT2]);
      data[KT1] -= 32;
      data[KT2] -= 32;
      print2byte(out,code,choice,one,two,data);
    }
    else if (((one >= 33) && (one <= 126)) || ((one >= 161) && (one <= 223)))
      print1byte(out,choice,one);
  }
}



More information about the Alt.sources mailing list