/* KSC 5601 < - > ISO-2022-KR code converter for Hangul Mail Exchange
   with non-localized MTA(e.g. sendmail )

   version 1.0pl1 ( June, 8, 1996) : supercedes one included in hmconv.tar.gz
                                     now is a bit more robust
                                     and works better in Emacs.

   version 1.0    ( May, 1996)

   By Jungshik Shin(jshin@minerva.cis.yale.edu)

   For details on Hangul mail exchange with this filter,
   see documents included in hmconv.tar.gz and
   references there. You may also subscribe to
   han.comp.mail and han.comp.hangul for further
   development in Hangul mail exchange including
   customization for Emacs+Rmail (.emacs setting)
   I plan to post.

*/


#include <stdio.h>
#define isksc(c)   ( (unsigned char) (c) > (unsigned char) '\240'   && \
       (unsigned char)  (c) < (unsigned char) '\377' ) 
#define is7ksc(c)   ( (unsigned char) (c) > (unsigned char) '\040'   && \
       (unsigned char)  (c) < (unsigned char) '\177' ) 

#define BUF 4096
#define SI '\017'
#define SO '\016'


FILE *in,*out;
int isdecode;
char line[BUF];
int ishangul;

int openfile(char *,char *,FILE **);
int convert();



main (int argc, char **argv)
{

  isdecode = 0;
  ishangul = 0;

  in = stdin;
  out = stdout;


  if ( argc > 1 && ! strcmp(argv[1],"-h") ) {
     fprintf(stderr,"Usage: %s [-h] [-u] [input file] [output file]\n",\
                     argv[0]);
     fputs("\t -h : prints this help\n",stderr);
     fputs("\t -u : ISO-2022-KR -> KSC 5601\n",stderr);
     fputs("\t      without '-u', KSC 5601 -> ISO-2022-KR\n",stderr);
     fputs("\t standard input(output) is assumed when\n",stderr); 
     fputs("\t input(output) file isn't specified.\n",stderr);
     exit(0) ; 
  }

  switch (argc) {
   case 4:  
         if ( ! strcmp(argv[1], "-u")) {
              openfile(argv[2],"r",&in);
              openfile(argv[3],"w",&out);
              isdecode = 1;
         }
         else {
              openfile(argv[1],"r",&in);
              openfile(argv[2],"w",&out);
         }
         break;
   case 3: 
         if ( ! strcmp(argv[1], "-u")) {
              openfile(argv[2],"r",&in);
              isdecode = 1;
         }
         else {
              openfile(argv[1],"r",&in);
              openfile(argv[2],"w",&out);
         }
         break;
   case 2: 
         if ( ! strcmp(argv[1], "-u")) {
              isdecode = 1;
         }
         else {
              openfile(argv[1],"r",&in);
         }
   }

   while (  fgets(line,BUF,in) != NULL )  
       convert();
   

   fclose(in);
   fclose(out);

}

#define KSC 1
#define ASCII 0
int convert()
{

   int mode=ASCII;
   int i=0;
   int c;
   
   if ( !isdecode ) {

      if ( !ishangul )

        /* search for KSC 5601 character(s) in line */

        while (  line[i] != '\n' && line[i] != EOF && line[i] != '\0'  ) {
                          /* the last case for buffer fill or
                             input from emacs which doesn't 
                             pad the region with EOF or '\n' 
                             when handing it over to a filter 
                             ( 'shell-command-on-region) */
           if ( isksc(line[i]) ) {
              ishangul = 1;               /* found KSC 5601 */
              fprintf(out,"\033$)C\n");   /* put out the designator */
              break;                      
/*            fprintf(out,"\033$)C");  */

/* RFC 1557 does not require '\n' after the designator
   and Hangul sendmail and cvt8.exe work fine without it,
   but 'hcode' expects '\n' and breaks a few characters
   after the designator if it's not followed by '\n'
*/ 
           }
           i++;
        }

      if ( !ishangul) {     /* KSC 5601 doesn't appear, yet */
         fputs(line,out);   /* no conversion */
         return;
      }


      i = 0 ;        /* back to the beginning of the line */

      while (  line[i] != '\n' && line[i] != EOF && line[i] != '\0'  ) {
                          /* the last case for buffer fill or
                             input from emacs which doesn't 
                             pad the region with EOF or '\n' 
                             when handing it over to a filter 
                             ( 'shell-command-on-region) */

        if ( mode == ASCII && isksc(line[i]))  {
         
          fputc(SO,out);
          fputc(0x7f & line[i],out);
          mode = KSC;
        }
        else if ( mode == ASCII && !isksc(line[i]) )
          fputc(line[i],out);
        else if ( mode == KSC && isksc(line[i]) )
/*       else if ( mode == KSC && ( isksc(line[i] || line[i] == ' ' ) ) */
          fputc(0x7f & line[i],out);
        else {
          fputc(SI,out);
          fputc(line[i],out);
          mode = ASCII;
        }
        i++;
     }
     if ( mode == KSC) 
        fputc(SI,out);


     if ( line[i] == '\n'  || ( line[i] == '\0' && i == BUF ) )      
       fputc('\n',out);       

/* added after testing with emacs.  EOF is added by fclose.    
   no need to add it manually. '\0' is replaced by '\n' only in 
   case of buffer fill */
 
       
   } /* end of if  for KSC -> ISO */

   else {  

/* It is more economical and strictly conforms to ISO-2022-KR,
   but some programs (e.g. Mule) seem to use 'ESC $ ) C'
   as the character-set switcher instead of the designator.
   Hence, these lines were commented out, and routine to work with 
   embeded designator was put in, instead */ 

  
/*      if ( ! strncmp(line,"\033$)C",4) ) {
         ishangul = 1;
         i+=4;
      }

      if ( !ishangul) {
         fputs(line,out);
         return(0);
      }

*/

      
      while (  line[i] != '\n' && line[i] != EOF && line[i] != '\0' ) {
                          /* the last case for buffer fill or
                             input from emacs which doesn't 
                             pad the region with EOF or '\n' 
                             when handing it over to a filter 
                             (shell-command-on-region)   */

        if ( ! strncmp(&line[i],"\033$)C",4) ) {
            ishangul = 1;
            i+=4;
            if ( line[i] == '\n' && i == 4 ) 
                                   /* remove '\n' from lines containing */
              return(0);           /* only the designator ESC+$)C       */
            continue;
        }

        if ( ! ishangul )
           fputc(line[i],out);

        else {

           switch( line[i] ) {
   
             case SO:
               mode=KSC;
               break;
             case SI:
               mode=ASCII;
               break;
             default:
               if ( mode==ASCII)
                  fputc(line[i],out);
               else  
                      /* space and tab can be embeded among KSC 5601 */
                  if ( line[i] != '\040' && line[i] != '\011' ) 

/* Or is it more reasonable to weed out characters outside [0x21,0x7e] ? */               
/*                if ( is7ksc(line[i]) )                */
                     fputc(line[i] | 0x80,out);
                  else
                     fputc(line[i],out);
            } /* end of switch */
         }

         i++;

      } /* end of while */

     if ( line[i] == '\n' )      /* added after testing with emacs */
       fputc(line[i],out);       /* EOF is added by fclose.        
                                    no need to add it manually 
                                    The same is true of '\0'      */

    }  /* end of else for ISO -> KSC */

    return(0);
}    

int openfile(char *name,char *mode,FILE **fp)
{
   if ( (*fp=fopen(name,mode)) == NULL ) {
      fprintf(stderr,"File %s open error !\n", name);
      exit(1);
   }
   return (0);
}   
