v08i074: cz text to PostScript system, part 10 of 14
Brandon S. Allbery - comp.sources.misc
allbery at uunet.UU.NET
Mon Oct 2 00:40:11 AEST 1989
Posting-number: Volume 8, Issue 74
Submitted-by: howard at dahlbeck.ericsson.se (Howard Gayle)
Archive-name: cz/part10
#! /bin/sh
# This is a shell archive. Remove anything before this line, then feed it
# into a shell via "sh file" or similar. To overwrite existing files,
# type "sh file -c".
# The tool that generated this appeared in the comp.sources.unix newsgroup;
# send mail to comp-sources-unix at uunet.uu.net if you want that tool.
# If this archive is complete, you will see the following message at the end:
# "End of archive 10 (of 14)."
# Contents: 78seus.c
# Wrapped by howard at dahlbeck on Mon Sep 25 07:15:23 1989
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f '78seus.c' -a "${1}" != "-c" ; then
echo shar: Will not clobber existing file \"'78seus.c'\"
else
echo shar: Extracting \"'78seus.c'\" \(50344 characters\)
sed "s/^X//" >'78seus.c' <<'END_OF_FILE'
X/*
X * 78seus - convert Swedish or (US) English from ISO 646 to ISO 8859/1
X */
X
X#ifndef lint
Xstatic char _cpyrgt[] = "Copyright 1989 Howard Lee Gayle";
X#endif lint
X
X/*
X * This program is free software; you can redistribute it and/or modify
X * it under the terms of the GNU General Public License version 1,
X * as published by the Free Software Foundation.
X *
X * This program is distributed in the hope that it will be useful,
X * but WITHOUT ANY WARRANTY; without even the implied warranty of
X * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
X * GNU General Public License for more details.
X *
X * You should have received a copy of the GNU General Public License
X * along with this program; if not, write to the Free Software
X * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
X */
X
X#include <stdio.h>
X#include <howard/port.h>
X#include <howard/version.h>
X#include <howard/usage.h>
X
XMAINVER ("@(#)$Header: 78seus.c,v 1.43 89/08/28 18:39:58 howard Exp $");
XUSAGE ("[-# shar-points] [-A attack] [-B blank-line-smoothing] [-D decay] [-b body-points] [-c colon-smoothing] [-d] [-f] [-m] [-s lines] [-t threshold]");
X
X#include <ctype.h>
X#include <limits.h>
X#include <string.h>
X#include <howard/a2.h>
X#include <howard/malf.h>
X#include <howard/registers.i>
X#include "cz.h"
X#include "78.h"
X
XPRIVATE byteT byte2t[256] = /* Map each byte to a trigram code.*/
X{
X32,/* 0/ 0 0 0 0 NUL (null) */
X32,/* 0/ 1 1 1 1 SOH (start of heading) */
X32,/* 0/ 2 2 2 2 STX (start of text) */
X32,/* 0/ 3 3 3 3 ETX (end of text) */
X32,/* 0/ 4 4 4 4 EOT (end of transmission) */
X32,/* 0/ 5 5 5 5 ENQ (enquiry) */
X32,/* 0/ 6 6 6 6 ACK (acknowledge) */
X32,/* 0/ 7 7 7 7 BEL (bell) */
X32,/* 0/ 8 10 8 8 BS (backspace) */
X32,/* 0/ 9 11 9 9 HT (horizontal tabulation) */
X32,/* 0/10 12 10 A LF (line feed) */
X32,/* 0/11 13 11 B VT (vertical tabulation) */
X32,/* 0/12 14 12 C FF (form feed) */
X32,/* 0/13 15 13 D CR (carriage return) */
X32,/* 0/14 16 14 E SO (shift out) */
X32,/* 0/15 17 15 F SI (shift in) */
X32,/* 1/ 0 20 16 10 DLE (data link escape) */
X32,/* 1/ 1 21 17 11 DC1 (device control 1) */
X32,/* 1/ 2 22 18 12 DC2 (device control 2) */
X32,/* 1/ 3 23 19 13 DC3 (device control 3) */
X32,/* 1/ 4 24 20 14 DC4 (device control 4) */
X32,/* 1/ 5 25 21 15 NAK (negative aknowledge) */
X32,/* 1/ 6 26 22 16 SYN (synchronous idle) */
X32,/* 1/ 7 27 23 17 ETB (end of transmission block) */
X32,/* 1/ 8 30 24 18 CAN (cancel) */
X32,/* 1/ 9 31 25 19 EM (end of medium) */
X32,/* 1/10 32 26 1A SUB (substitute character) */
X32,/* 1/11 33 27 1B ESC (escape) */
X32,/* 1/12 34 28 1C IS4/FS (information separator 4 / file separator) */
X32,/* 1/13 35 29 1D IS3/GS (information separator 3 / group separator) */
X32,/* 1/14 36 30 1E IS2/RS (information separator 2 / record separator)*/
X32,/* 1/15 37 31 1F IS1/US (information separator 1 / unit separator) */
X32,/* 2/ 0 40 32 20 space */
X32,/* 2/ 1 41 33 21 exclamation mark */
X32,/* 2/ 2 42 34 22 quotation mark */
X32,/* 2/ 3 43 35 23 number sign */
X32,/* 2/ 4 44 36 24 dollar sign */
X32,/* 2/ 5 45 37 25 percent sign */
X32,/* 2/ 6 46 38 26 ampersand */
X32,/* 2/ 7 47 39 27 apostrophe */
X32,/* 2/ 8 50 40 28 left parenthesis */
X32,/* 2/ 9 51 41 29 right parenthesis */
X32,/* 2/10 52 42 2A asterisk */
X32,/* 2/11 53 43 2B plus sign */
X32,/* 2/12 54 44 2C comma */
X32,/* 2/13 55 45 2D hyphen, minus sign */
X32,/* 2/14 56 46 2E full stop */
X32,/* 2/15 57 47 2F solidus */
X32,/* 3/ 0 60 48 30 digit zero */
X32,/* 3/ 1 61 49 31 digit one */
X32,/* 3/ 2 62 50 32 digit two */
X32,/* 3/ 3 63 51 33 digit three */
X32,/* 3/ 4 64 52 34 digit four */
X32,/* 3/ 5 65 53 35 digit five */
X32,/* 3/ 6 66 54 36 digit six */
X32,/* 3/ 7 67 55 37 digit seven */
X32,/* 3/ 8 70 56 38 digit eight */
X32,/* 3/ 9 71 57 39 digit nine */
X32,/* 3/10 72 58 3A colon */
X32,/* 3/11 73 59 3B semicolon */
X32,/* 3/12 74 60 3C less-than sign */
X32,/* 3/13 75 61 3D equals sign */
X32,/* 3/14 76 62 3E greater-than sign */
X32,/* 3/15 77 63 3F question mark */
X32,/* 4/ 0 100 64 40 commercial at */
X 0,/* 4/ 1 101 65 41 A */
X 1,/* 4/ 2 102 66 42 B */
X 2,/* 4/ 3 103 67 43 C */
X 3,/* 4/ 4 104 68 44 D */
X 4,/* 4/ 5 105 69 45 E */
X 5,/* 4/ 6 106 70 46 F */
X 6,/* 4/ 7 107 71 47 G */
X 7,/* 4/ 8 110 72 48 H */
X 8,/* 4/ 9 111 73 49 I */
X 9,/* 4/10 112 74 4A J */
X10,/* 4/11 113 75 4B K */
X11,/* 4/12 114 76 4C L */
X12,/* 4/13 115 77 4D M */
X13,/* 4/14 116 78 4E N */
X14,/* 4/15 117 79 4F O */
X15,/* 5/ 0 120 80 50 P */
X16,/* 5/ 1 121 81 51 Q */
X17,/* 5/ 2 122 82 52 R */
X18,/* 5/ 3 123 83 53 S */
X19,/* 5/ 4 124 84 54 T */
X20,/* 5/ 5 125 85 55 U */
X21,/* 5/ 6 126 86 56 V */
X22,/* 5/ 7 127 87 57 W */
X23,/* 5/ 8 130 88 58 X */
X24,/* 5/ 9 131 89 59 Y */
X25,/* 5/10 132 90 5A Z */
X27,/* 5/11 133 91 5B left square bracket */
X28,/* 5/12 134 92 5C reverse solidus */
X26,/* 5/13 135 93 5D right square bracket */
X32,/* 5/14 136 94 5E circumflex accent */
X32,/* 5/15 137 95 5F low line, underline */
X29,/* 6/ 0 140 96 60 grave accent */
X 0,/* 6/ 1 141 97 61 a */
X 1,/* 6/ 2 142 98 62 b */
X 2,/* 6/ 3 143 99 63 c */
X 3,/* 6/ 4 144 100 64 d */
X 4,/* 6/ 5 145 101 65 e */
X 5,/* 6/ 6 146 102 66 f */
X 6,/* 6/ 7 147 103 67 g */
X 7,/* 6/ 8 150 104 68 h */
X 8,/* 6/ 9 151 105 69 i */
X 9,/* 6/10 152 106 6A j */
X10,/* 6/11 153 107 6B k */
X11,/* 6/12 154 108 6C l */
X12,/* 6/13 155 109 6D m */
X13,/* 6/14 156 110 6E n */
X14,/* 6/15 157 111 6F o */
X15,/* 7/ 0 160 112 70 p */
X16,/* 7/ 1 161 113 71 q */
X17,/* 7/ 2 162 114 72 r */
X18,/* 7/ 3 163 115 73 s */
X19,/* 7/ 4 164 116 74 t */
X20,/* 7/ 5 165 117 75 u */
X21,/* 7/ 6 166 118 76 v */
X22,/* 7/ 7 167 119 77 w */
X23,/* 7/ 8 170 120 78 x */
X24,/* 7/ 9 171 121 79 y */
X25,/* 7/10 172 122 7A z */
X27,/* 7/11 173 123 7B left curly bracket */
X28,/* 7/12 174 124 7C vertical line */
X26,/* 7/13 175 125 7D right curly bracket */
X32,/* 7/14 176 126 7E tilde */
X32,/* 7/15 177 127 7F DEL (delete) */
X32,/* 8/ 0 200 128 80 */
X32,/* 8/ 1 201 129 81 */
X32,/* 8/ 2 202 130 82 */
X32,/* 8/ 3 203 131 83 */
X32,/* 8/ 4 204 132 84 */
X32,/* 8/ 5 205 133 85 */
X32,/* 8/ 6 206 134 86 */
X32,/* 8/ 7 207 135 87 */
X32,/* 8/ 8 210 136 88 */
X32,/* 8/ 9 211 137 89 */
X32,/* 8/10 212 138 8A */
X32,/* 8/11 213 139 8B */
X32,/* 8/12 214 140 8C */
X32,/* 8/13 215 141 8D */
X32,/* 8/14 216 142 8E */
X32,/* 8/15 217 143 8F */
X32,/* 9/ 0 220 144 90 */
X32,/* 9/ 1 221 145 91 */
X32,/* 9/ 2 222 146 92 */
X32,/* 9/ 3 223 147 93 */
X32,/* 9/ 4 224 148 94 */
X32,/* 9/ 5 225 149 95 */
X32,/* 9/ 6 226 150 96 */
X32,/* 9/ 7 227 151 97 */
X32,/* 9/ 8 230 152 98 */
X32,/* 9/ 9 231 153 99 */
X32,/* 9/10 232 154 9A */
X32,/* 9/11 233 155 9B */
X32,/* 9/12 234 156 9C */
X32,/* 9/13 235 157 9D */
X32,/* 9/14 236 158 9E */
X32,/* 9/15 237 159 9F */
X32,/*10/ 0 240 160 A0 NBSP (no-break space) */
X32,/*10/ 1 241 161 A1 inverted exclamation mark */
X32,/*10/ 2 242 162 A2 cent sign */
X32,/*10/ 3 243 163 A3 pound sign */
X32,/*10/ 4 244 164 A4 general currency sign */
X32,/*10/ 5 245 165 A5 yen sign */
X32,/*10/ 6 246 166 A6 broken vertical line */
X32,/*10/ 7 247 167 A7 section sign */
X32,/*10/ 8 250 168 A8 diaeresis */
X32,/*10/ 9 251 169 A9 copyright sign */
X32,/*10/10 252 170 AA ordinal indicator, feminine */
X32,/*10/11 253 171 AB angle quotation mark left */
X32,/*10/12 254 172 AC not sign */
X32,/*10/13 255 173 AD soft hyphen */
X32,/*10/14 256 174 AE registered sign */
X32,/*10/15 257 175 AF macron */
X32,/*11/ 0 260 176 B0 degree sign */
X32,/*11/ 1 261 177 B1 plus or minus sign */
X32,/*11/ 2 262 178 B2 superscript two */
X32,/*11/ 3 263 179 B3 superscript three */
X32,/*11/ 4 264 180 B4 acute accent */
X32,/*11/ 5 265 181 B5 micro sign */
X32,/*11/ 6 266 182 B6 pilcrow */
X32,/*11/ 7 267 183 B7 middle dot */
X32,/*11/ 8 270 184 B8 cedilla */
X32,/*11/ 9 271 185 B9 superscript one */
X32,/*11/10 272 186 BA ordinal indicator, masculine */
X32,/*11/11 273 187 BB angle quotation mark right */
X32,/*11/12 274 188 BC fraction one-quarter */
X32,/*11/13 275 189 BD fraction one-half */
X32,/*11/14 276 190 BE fraction three-quarters */
X32,/*11/15 277 191 BF inverted question mark */
X32,/*12/ 0 300 192 C0 capital A with grave accent */
X32,/*12/ 1 301 193 C1 capital A with acute accent */
X32,/*12/ 2 302 194 C2 capital A with circumflex accent */
X32,/*12/ 3 303 195 C3 capital A with tilde */
X27,/*12/ 4 304 196 C4 capital A with diaeresis or umlaut mark */
X26,/*12/ 5 305 197 C5 capital A with ring */
X32,/*12/ 6 306 198 C6 capital AE diphthong */
X32,/*12/ 7 307 199 C7 capital C with cedilla */
X32,/*12/ 8 310 200 C8 capital E with grave accent */
X32,/*12/ 9 311 201 C9 capital E with acute accent */
X32,/*12/10 312 202 CA capital E with circumflex accent */
X32,/*12/11 313 203 CB capital E with diaeresis or umlaut mark */
X32,/*12/12 314 204 CC capital I with grave accent */
X32,/*12/13 315 205 CD capital I with acute accent */
X32,/*12/14 316 206 CE capital I with circumflex accent */
X32,/*12/15 317 207 CF capital I with diaeresis or umlaut mark */
X32,/*13/ 0 320 208 D0 capital D with stroke, Icelandic eth */
X32,/*13/ 1 321 209 D1 capital N with tilde */
X32,/*13/ 2 322 210 D2 capital O with grave accent */
X32,/*13/ 3 323 211 D3 capital O with acute accent */
X32,/*13/ 4 324 212 D4 capital O with circumflex accent */
X32,/*13/ 5 325 213 D5 capital O with tilde */
X28,/*13/ 6 326 214 D6 capital O with diaeresis or umlaut mark */
X32,/*13/ 7 327 215 D7 multiplication sign */
X32,/*13/ 8 330 216 D8 capital O with slash */
X32,/*13/ 9 331 217 D9 capital U with grave accent */
X32,/*13/10 332 218 DA capital U with acute accent */
X32,/*13/11 333 219 DB capital U with circumflex accent */
X32,/*13/12 334 220 DC capital U with diaeresis or umlaut mark */
X32,/*13/13 335 221 DD capital Y with acute accent */
X32,/*13/14 336 222 DE capital thorn, Icelandic */
X32,/*13/15 337 223 DF small sharp s, German */
X32,/*14/ 0 340 224 E0 small a with grave accent */
X32,/*14/ 1 341 225 E1 small a with acute accent */
X32,/*14/ 2 342 226 E2 small a with circumflex accent */
X32,/*14/ 3 343 227 E3 small a with tilde */
X27,/*14/ 4 344 228 E4 small a with diaeresis or umlaut mark */
X26,/*14/ 5 345 229 E5 small a with ring */
X32,/*14/ 6 346 230 E6 small ae diphthong */
X32,/*14/ 7 347 231 E7 small c with cedilla */
X32,/*14/ 8 350 232 E8 small e with grave accent */
X29,/*14/ 9 351 233 E9 small e with acute accent */
X32,/*14/10 352 234 EA small e with circumflex accent */
X32,/*14/11 353 235 EB small e with diaeresis or umlaut mark */
X32,/*14/12 354 236 EC small i with grave accent */
X32,/*14/13 355 237 ED small i with acute accent */
X32,/*14/14 356 238 EE small i with circumflex accent */
X32,/*14/15 357 239 EF small i with diaeresis or umlaut mark */
X32,/*15/ 0 360 240 F0 small d with stroke, Icelandic eth */
X32,/*15/ 1 361 241 F1 small n with tilde */
X32,/*15/ 2 362 242 F2 small o with grave accent */
X32,/*15/ 3 363 243 F3 small o with acute accent */
X32,/*15/ 4 364 244 F4 small o with circumflex accent */
X32,/*15/ 5 365 245 F5 small o with tilde */
X28,/*15/ 6 366 246 F6 small o with diaeresis or umlaut mark */
X32,/*15/ 7 367 247 F7 division sign */
X32,/*15/ 8 370 248 F8 small o with slash */
X32,/*15/ 9 371 249 F9 small u with grave accent */
X32,/*15/10 372 250 FA small u with acute accent */
X32,/*15/11 373 251 FB small u with circumflex accent */
X32,/*15/12 374 252 FC small u with diaeresis or umlaut mark */
X32,/*15/13 375 253 FD small y with acute accent */
X32,/*15/14 376 254 FE small thorn, Icelandic */
X32,/*15/15 377 255 FF small y with diaeresis or umlaut mark */
X};
X
X
XPRIVATE byteT se8[256] = /* Map Swedish ISO 646 to ISO 8859/1.*/
X{
X0000,/* 0/ 0 0 0 0 NUL (null) */
X0001,/* 0/ 1 1 1 1 SOH (start of heading) */
X0002,/* 0/ 2 2 2 2 STX (start of text) */
X0003,/* 0/ 3 3 3 3 ETX (end of text) */
X0004,/* 0/ 4 4 4 4 EOT (end of transmission) */
X0005,/* 0/ 5 5 5 5 ENQ (enquiry) */
X0006,/* 0/ 6 6 6 6 ACK (acknowledge) */
X0007,/* 0/ 7 7 7 7 BEL (bell) */
X0010,/* 0/ 8 10 8 8 BS (backspace) */
X0011,/* 0/ 9 11 9 9 HT (horizontal tabulation) */
X0012,/* 0/10 12 10 A LF (line feed) */
X0013,/* 0/11 13 11 B VT (vertical tabulation) */
X0014,/* 0/12 14 12 C FF (form feed) */
X0015,/* 0/13 15 13 D CR (carriage return) */
X0016,/* 0/14 16 14 E SO (shift out) */
X0017,/* 0/15 17 15 F SI (shift in) */
X0020,/* 1/ 0 20 16 10 DLE (data link escape) */
X0021,/* 1/ 1 21 17 11 DC1 (device control 1) */
X0022,/* 1/ 2 22 18 12 DC2 (device control 2) */
X0023,/* 1/ 3 23 19 13 DC3 (device control 3) */
X0024,/* 1/ 4 24 20 14 DC4 (device control 4) */
X0025,/* 1/ 5 25 21 15 NAK (negative aknowledge) */
X0026,/* 1/ 6 26 22 16 SYN (synchronous idle) */
X0027,/* 1/ 7 27 23 17 ETB (end of transmission block) */
X0030,/* 1/ 8 30 24 18 CAN (cancel) */
X0031,/* 1/ 9 31 25 19 EM (end of medium) */
X0032,/* 1/10 32 26 1A SUB (substitute character) */
X0033,/* 1/11 33 27 1B ESC (escape) */
X0034,/* 1/12 34 28 1C IS4/FS (information separator 4 / file separator)*/
X0035,/* 1/13 35 29 1D IS3/GS (information separator 3 / group separator) */
X0036,/* 1/14 36 30 1E IS2/RS (information separator 2 / record separator)*/
X0037,/* 1/15 37 31 1F IS1/US (information separator 1 / unit separator)*/
X0040,/* 2/ 0 40 32 20 space */
X0041,/* 2/ 1 41 33 21 exclamation mark */
X0042,/* 2/ 2 42 34 22 quotation mark */
X0043,/* 2/ 3 43 35 23 number sign */
X0044,/* 2/ 4 44 36 24 dollar sign */
X0045,/* 2/ 5 45 37 25 percent sign */
X0046,/* 2/ 6 46 38 26 ampersand */
X0047,/* 2/ 7 47 39 27 apostrophe */
X0050,/* 2/ 8 50 40 28 left parenthesis */
X0051,/* 2/ 9 51 41 29 right parenthesis */
X0052,/* 2/10 52 42 2A asterisk */
X0053,/* 2/11 53 43 2B plus sign */
X0054,/* 2/12 54 44 2C comma */
X0055,/* 2/13 55 45 2D hyphen, minus sign */
X0056,/* 2/14 56 46 2E full stop */
X0057,/* 2/15 57 47 2F solidus */
X0060,/* 3/ 0 60 48 30 digit zero */
X0061,/* 3/ 1 61 49 31 digit one */
X0062,/* 3/ 2 62 50 32 digit two */
X0063,/* 3/ 3 63 51 33 digit three */
X0064,/* 3/ 4 64 52 34 digit four */
X0065,/* 3/ 5 65 53 35 digit five */
X0066,/* 3/ 6 66 54 36 digit six */
X0067,/* 3/ 7 67 55 37 digit seven */
X0070,/* 3/ 8 70 56 38 digit eight */
X0071,/* 3/ 9 71 57 39 digit nine */
X0072,/* 3/10 72 58 3A colon */
X0073,/* 3/11 73 59 3B semicolon */
X0074,/* 3/12 74 60 3C less-than sign */
X0075,/* 3/13 75 61 3D equals sign */
X0076,/* 3/14 76 62 3E greater-than sign */
X0077,/* 3/15 77 63 3F question mark */
X0100,/* 4/ 0 100 64 40 commercial at */
X0101,/* 4/ 1 101 65 41 A */
X0102,/* 4/ 2 102 66 42 B */
X0103,/* 4/ 3 103 67 43 C */
X0104,/* 4/ 4 104 68 44 D */
X0105,/* 4/ 5 105 69 45 E */
X0106,/* 4/ 6 106 70 46 F */
X0107,/* 4/ 7 107 71 47 G */
X0110,/* 4/ 8 110 72 48 H */
X0111,/* 4/ 9 111 73 49 I */
X0112,/* 4/10 112 74 4A J */
X0113,/* 4/11 113 75 4B K */
X0114,/* 4/12 114 76 4C L */
X0115,/* 4/13 115 77 4D M */
X0116,/* 4/14 116 78 4E N */
X0117,/* 4/15 117 79 4F O */
X0120,/* 5/ 0 120 80 50 P */
X0121,/* 5/ 1 121 81 51 Q */
X0122,/* 5/ 2 122 82 52 R */
X0123,/* 5/ 3 123 83 53 S */
X0124,/* 5/ 4 124 84 54 T */
X0125,/* 5/ 5 125 85 55 U */
X0126,/* 5/ 6 126 86 56 V */
X0127,/* 5/ 7 127 87 57 W */
X0130,/* 5/ 8 130 88 58 X */
X0131,/* 5/ 9 131 89 59 Y */
X0132,/* 5/10 132 90 5A Z */
X0304,/* 5/11 133 91 5B left square bracket */
X0326,/* 5/12 134 92 5C reverse solidus */
X0305,/* 5/13 135 93 5D right square bracket */
X0136,/* 5/14 136 94 5E circumflex accent */
X0137,/* 5/15 137 95 5F low line, underline */
X0351,/* 6/ 0 140 96 60 grave accent */
X0141,/* 6/ 1 141 97 61 a */
X0142,/* 6/ 2 142 98 62 b */
X0143,/* 6/ 3 143 99 63 c */
X0144,/* 6/ 4 144 100 64 d */
X0145,/* 6/ 5 145 101 65 e */
X0146,/* 6/ 6 146 102 66 f */
X0147,/* 6/ 7 147 103 67 g */
X0150,/* 6/ 8 150 104 68 h */
X0151,/* 6/ 9 151 105 69 i */
X0152,/* 6/10 152 106 6A j */
X0153,/* 6/11 153 107 6B k */
X0154,/* 6/12 154 108 6C l */
X0155,/* 6/13 155 109 6D m */
X0156,/* 6/14 156 110 6E n */
X0157,/* 6/15 157 111 6F o */
X0160,/* 7/ 0 160 112 70 p */
X0161,/* 7/ 1 161 113 71 q */
X0162,/* 7/ 2 162 114 72 r */
X0163,/* 7/ 3 163 115 73 s */
X0164,/* 7/ 4 164 116 74 t */
X0165,/* 7/ 5 165 117 75 u */
X0166,/* 7/ 6 166 118 76 v */
X0167,/* 7/ 7 167 119 77 w */
X0170,/* 7/ 8 170 120 78 x */
X0171,/* 7/ 9 171 121 79 y */
X0172,/* 7/10 172 122 7A z */
X0344,/* 7/11 173 123 7B left curly bracket */
X0366,/* 7/12 174 124 7C vertical line */
X0345,/* 7/13 175 125 7D right curly bracket */
X0176,/* 7/14 176 126 7E tilde */
X0177,/* 7/15 177 127 7F DEL (delete) */
X0200,/* 8/ 0 200 128 80 */
X0201,/* 8/ 1 201 129 81 */
X0202,/* 8/ 2 202 130 82 */
X0203,/* 8/ 3 203 131 83 */
X0204,/* 8/ 4 204 132 84 */
X0205,/* 8/ 5 205 133 85 */
X0206,/* 8/ 6 206 134 86 */
X0207,/* 8/ 7 207 135 87 */
X0210,/* 8/ 8 210 136 88 */
X0211,/* 8/ 9 211 137 89 */
X0212,/* 8/10 212 138 8A */
X0213,/* 8/11 213 139 8B */
X0214,/* 8/12 214 140 8C */
X0215,/* 8/13 215 141 8D */
X0216,/* 8/14 216 142 8E */
X0217,/* 8/15 217 143 8F */
X0220,/* 9/ 0 220 144 90 */
X0221,/* 9/ 1 221 145 91 */
X0222,/* 9/ 2 222 146 92 */
X0223,/* 9/ 3 223 147 93 */
X0224,/* 9/ 4 224 148 94 */
X0225,/* 9/ 5 225 149 95 */
X0226,/* 9/ 6 226 150 96 */
X0227,/* 9/ 7 227 151 97 */
X0230,/* 9/ 8 230 152 98 */
X0231,/* 9/ 9 231 153 99 */
X0232,/* 9/10 232 154 9A */
X0233,/* 9/11 233 155 9B */
X0234,/* 9/12 234 156 9C */
X0235,/* 9/13 235 157 9D */
X0236,/* 9/14 236 158 9E */
X0237,/* 9/15 237 159 9F */
X0240,/*10/ 0 240 160 A0 NBSP (no-break space) */
X0241,/*10/ 1 241 161 A1 inverted exclamation mark */
X0242,/*10/ 2 242 162 A2 cent sign */
X0243,/*10/ 3 243 163 A3 pound sign */
X0244,/*10/ 4 244 164 A4 general currency sign */
X0245,/*10/ 5 245 165 A5 yen sign */
X0246,/*10/ 6 246 166 A6 broken vertical line */
X0247,/*10/ 7 247 167 A7 section sign */
X0250,/*10/ 8 250 168 A8 diaeresis */
X0251,/*10/ 9 251 169 A9 copyright sign */
X0252,/*10/10 252 170 AA ordinal indicator, feminine */
X0253,/*10/11 253 171 AB angle quotation mark left */
X0254,/*10/12 254 172 AC not sign */
X0255,/*10/13 255 173 AD soft hyphen */
X0256,/*10/14 256 174 AE registered sign */
X0257,/*10/15 257 175 AF macron */
X0260,/*11/ 0 260 176 B0 degree sign */
X0261,/*11/ 1 261 177 B1 plus or minus sign */
X0262,/*11/ 2 262 178 B2 superscript two */
X0263,/*11/ 3 263 179 B3 superscript three */
X0264,/*11/ 4 264 180 B4 acute accent */
X0265,/*11/ 5 265 181 B5 micro sign */
X0266,/*11/ 6 266 182 B6 pilcrow */
X0267,/*11/ 7 267 183 B7 middle dot */
X0270,/*11/ 8 270 184 B8 cedilla */
X0271,/*11/ 9 271 185 B9 superscript one */
X0272,/*11/10 272 186 BA ordinal indicator, masculine */
X0273,/*11/11 273 187 BB angle quotation mark right */
X0274,/*11/12 274 188 BC fraction one-quarter */
X0275,/*11/13 275 189 BD fraction one-half */
X0276,/*11/14 276 190 BE fraction three-quarters */
X0277,/*11/15 277 191 BF inverted question mark */
X0300,/*12/ 0 300 192 C0 capital A with grave accent */
X0301,/*12/ 1 301 193 C1 capital A with acute accent */
X0302,/*12/ 2 302 194 C2 capital A with circumflex accent */
X0303,/*12/ 3 303 195 C3 capital A with tilde */
X0304,/*12/ 4 304 196 C4 capital A with diaeresis or umlaut mark */
X0305,/*12/ 5 305 197 C5 capital A with ring */
X0306,/*12/ 6 306 198 C6 capital AE diphthong */
X0307,/*12/ 7 307 199 C7 capital C with cedilla */
X0310,/*12/ 8 310 200 C8 capital E with grave accent */
X0311,/*12/ 9 311 201 C9 capital E with acute accent */
X0312,/*12/10 312 202 CA capital E with circumflex accent */
X0313,/*12/11 313 203 CB capital E with diaeresis or umlaut mark */
X0314,/*12/12 314 204 CC capital I with grave accent */
X0315,/*12/13 315 205 CD capital I with acute accent */
X0316,/*12/14 316 206 CE capital I with circumflex accent */
X0317,/*12/15 317 207 CF capital I with diaeresis or umlaut mark */
X0320,/*13/ 0 320 208 D0 capital D with stroke, Icelandic eth */
X0321,/*13/ 1 321 209 D1 capital N with tilde */
X0322,/*13/ 2 322 210 D2 capital O with grave accent */
X0323,/*13/ 3 323 211 D3 capital O with acute accent */
X0324,/*13/ 4 324 212 D4 capital O with circumflex accent */
X0325,/*13/ 5 325 213 D5 capital O with tilde */
X0326,/*13/ 6 326 214 D6 capital O with diaeresis or umlaut mark */
X0327,/*13/ 7 327 215 D7 multiplication sign */
X0330,/*13/ 8 330 216 D8 capital O with slash */
X0331,/*13/ 9 331 217 D9 capital U with grave accent */
X0332,/*13/10 332 218 DA capital U with acute accent */
X0333,/*13/11 333 219 DB capital U with circumflex accent */
X0334,/*13/12 334 220 DC capital U with diaeresis or umlaut mark */
X0335,/*13/13 335 221 DD capital Y with acute accent */
X0336,/*13/14 336 222 DE capital thorn, Icelandic */
X0337,/*13/15 337 223 DF small sharp s, German */
X0340,/*14/ 0 340 224 E0 small a with grave accent */
X0341,/*14/ 1 341 225 E1 small a with acute accent */
X0342,/*14/ 2 342 226 E2 small a with circumflex accent */
X0343,/*14/ 3 343 227 E3 small a with tilde */
X0344,/*14/ 4 344 228 E4 small a with diaeresis or umlaut mark */
X0345,/*14/ 5 345 229 E5 small a with ring */
X0346,/*14/ 6 346 230 E6 small ae diphthong */
X0347,/*14/ 7 347 231 E7 small c with cedilla */
X0350,/*14/ 8 350 232 E8 small e with grave accent */
X0351,/*14/ 9 351 233 E9 small e with acute accent */
X0352,/*14/10 352 234 EA small e with circumflex accent */
X0353,/*14/11 353 235 EB small e with diaeresis or umlaut mark */
X0354,/*14/12 354 236 EC small i with grave accent */
X0355,/*14/13 355 237 ED small i with acute accent */
X0356,/*14/14 356 238 EE small i with circumflex accent */
X0357,/*14/15 357 239 EF small i with diaeresis or umlaut mark */
X0360,/*15/ 0 360 240 F0 small d with stroke, Icelandic eth */
X0361,/*15/ 1 361 241 F1 small n with tilde */
X0362,/*15/ 2 362 242 F2 small o with grave accent */
X0363,/*15/ 3 363 243 F3 small o with acute accent */
X0364,/*15/ 4 364 244 F4 small o with circumflex accent */
X0365,/*15/ 5 365 245 F5 small o with tilde */
X0366,/*15/ 6 366 246 F6 small o with diaeresis or umlaut mark */
X0367,/*15/ 7 367 247 F7 division sign */
X0370,/*15/ 8 370 248 F8 small o with slash */
X0371,/*15/ 9 371 249 F9 small u with grave accent */
X0372,/*15/10 372 250 FA small u with acute accent */
X0373,/*15/11 373 251 FB small u with circumflex accent */
X0374,/*15/12 374 252 FC small u with diaeresis or umlaut mark */
X0375,/*15/13 375 253 FD small y with acute accent */
X0376,/*15/14 376 254 FE small thorn, Icelandic */
X0377,/*15/15 377 255 FF small y with diaeresis or umlaut mark */
X};
X
X#include "78common.h"
X
X/* Different sections in a file: */
X#define S_HDR 1 /* News article header.*/
X#define S_BODY 2 /* News article body.*/
X#define S_SIG 3 /* News article signature.*/
X
XPRIVATE double attack = 0.65; /* Smoothing factor.*/
XPRIVATE double blank = 0.7; /* Scale attack/decay on blank lines.*/
XPRIVATE double bodval = -200.0; /* Score at start of body.*/
XPRIVATE double colon = 0.5; /* Scale attack/decay after colon.*/
XPRIVATE boolT debug = FALSE; /* Debug flag.*/
XPRIVATE double decay = 0.67; /* Smoothing factor.*/
XPRIVATE boolT fixbody = FALSE; /* Ordinary file, no header or signature.*/
XPRIVATE double headval = 0.0; /* For header values.*/
XPRIVATE boolT mailbox = FALSE; /* Converting a mailbox.*/
XPRIVATE double pound1 = -350.0; /* After # at beginning of line.*/
XPRIVATE unsigned siglns = 9; /* Max lines in a signature.*/
XPRIVATE double thresh = 0.0; /* Score above this is Swedish.*/
XPRIVATE triDifT seustt[TRIMAX + 1];/* Trigram difference table.*/
X
XPRIVATE bStrT sewords[] = /* These are always Swedish.*/
X {
X S("D}"),
X S("p}"),
X S("s}"),
X S("{r"),
X S("|ver"),
X NULBSTR
X };
X
XPRIVATE bStrT uswords[] = /* These are never Swedish.*/
X {
X S("[]"),
X S("[The"),
X NULBSTR
X };
X
X#include "78heur.h"
X
X/* seus - run heuristics on one file */
X
XPRIVATE void seus (is, fn)
XR9 streamT is; /* Input stream.*/
X bStrT fn; /* File name.*/
X
X/* Function:
X * Copy file to standard output, converting to ISO 8859/1.
X * Algorithm:
X * Read each line. Switch on section and look for section
X * transitions. Step through the line. Look for section matches.
X * Call dif78() on each word. Compute score. If word looks Swedish,
X * convert it. Write line.
X * Returns:
X *
X * Notes:
X *
X */
X{
XR2 rcharT b; /* Current input byte.*/
XR4 int i; /* General putpose.*/
X double cum = 0.0; /* Cumulative score.*/
X unsigned ln = 0; /* Input line number.*/
X int lns = -1; /* Value from Lines: header field; -1 = unknown.*/
XR8 unsigned sigln = 1; /* Line number in signature.*/
XR5 bStrT p1; /* Rest of line after special match.*/
XR1 bStrT lp; /* Steps through lb[].*/
XR7 boolT sigbeg; /* Line looks like start of signature.*/
XR3 bStrT wp = NULBSTR; /* Points to start of word.*/
XR6 unsigned sect; /* Current section.*/
X byteT lb[MLINE + 1]; /* Line buffer.*/
X
Xsect = (fixbody ? S_BODY : S_HDR);
Xlb[0] = ' ';
Xwhile (NULBSTR != (getlin ((lp = &lb[1]), MLINE, is, fn, &ln, 0)))
X {
X if (mailbox && (NULBSTR != prefix (S("From "), lp)))
X {
X cum = 0.0;
X ln = 1;
X lns = -1;
X sect = S_HDR;
X sigln = 1;
X sigbeg = FALSE;
X }
X else
X sigbeg = SigBegP (lp);
X switch (sect)
X {
X case S_HDR:
X if (EOS == B(*lp))
X {
X sect = S_BODY;
X cum = bodval;
X ln = 0;
X }
X else
X {
X if (NULBSTR != (p1 = prefix (S("Lines: "), lp)))
X (void) a2i (p1, NULBSTR, TRUE, &lns, (bStrT *) NULL);
X if (NULBSTR != (p1 = bStrChr (lp, ':')))
X {
X cum = headval;
X lp = p1 + 1;
X }
X }
X break;
X case S_BODY:
X if (sigbeg || (!fixbody && (lns > siglns) && (ln > (lns - siglns))))
X sect = S_SIG;
X else
X {
X for (; '>' == B(*lp); ++lp)
X ;
X if (('#' == B(*lp)) || ('X' == B(*lp)))
X cum = MIN (cum, pound1);
X }
X break;
X case S_SIG:
X if (sigbeg)
X sigln = 1;
X else if (sigln <= siglns)
X ++sigln;
X else
X {
X sigln = 1;
X sect = S_BODY;
X }
X break;
X default:
X malf1 (eIntern, "seus 1");
X break;
X }
X if (EOS == B(*lp)) cum *= blank * ((cum > thresh) ? decay : attack);
X do
X {
X b = B(*lp);
X if ((NULBSTR != (p1 = BraceP (lp, sect))) ||
X (NULBSTR != (p1 = UunetP (lp))) ||
X (NULBSTR != (p1 = IPP (lp))) ||
X (NULBSTR != (p1 = InArtP (lp, sect))) ||
X (NULBSTR != (p1 = GrafP (lp, sect))) ||
X (NULBSTR != (p1 = PipeP (lp, sect))) ||
X (NULBSTR != (p1 = EndP (lp, sect, S(") writes:")))) ||
X (NULBSTR != (p1 = EndP (lp, sect, S(" \\n\\")))) ||
X (NULBSTR != (p1 = LaTeXP (lp))))
X {
X lp = p1;
X wp = NULBSTR;
X }
X else
X {
X if (byte2t[b] <= TRIHI)
X {
X if (NULBSTR == wp) wp = lp;
X }
X else
X {
X if (NULBSTR != wp)
X {
X i = dif78 (wp, lp, seustt);
X cum *= ((i > 0) ? attack : decay);
X cum += i;
X if (((cum > thresh) && !wordp (wp, lp, uswords)) ||
X wordp (wp, lp, sewords))
X {
X for (p1 = wp; p1 != lp; ++p1)
X *p1 = se8[B(*p1)];
X }
X if (debug)
X FPRINTF (stderr, "%c%6.0f %.*s\n", "?hbsf"[sect], cum,
X lp - wp, wp);
X wp = NULBSTR;
X if (':' == b)
X cum *= colon * ((cum > thresh) ? decay : attack);
X }
X }
X ++lp;
X }
X }
X while (EOS != b);
X puts (&lb[1]);
X }
X}
X
X/* main - main function */
X
XPUBLIC int main (argc, argv)
X int argc; /* Number of arguments.*/
XR3 bStrT *argv; /* Points to array of argument strings.*/
X
X/* Function:
X *
X * Algorithm:
X * Decode args. Initialize. Call seus().
X * Notes:
X *
X */
X
X{
XR1 rcharT c; /* Option letter.*/
XR2 bStrT cp; /* Steps through args.*/
Xextern int optind; /* See getopt (3).*/
Xextern cStrT optarg; /* See getopt (3).*/
X
Xwhile (EOF != (c = getopt (argc, (cStrT *) argv, "#:A:B:D:b:c:dfh:ms:t:")))
X {
X switch (c)
X {
X case '?':
X usage();
X break;
X case '#':
X pound1 = ma2d ((bStrT) optarg, NULBSTR, FALSE, "# Value",
X (bStrT *) NULL);
X break;
X case 'A':
X attack = mra2d ((bStrT) optarg, NULBSTR, FALSE, "Attack", 0.001, 0.999,
X (bStrT *) NULL);
X break;
X case 'B':
X blank = ma2d ((bStrT) optarg, NULBSTR, FALSE, S("Blank smoothing"),
X (bStrT *) NULL);
X break;
X case 'D':
X decay = mra2d ((bStrT) optarg, NULBSTR, FALSE, "Decay", 0.001, 0.999,
X (bStrT *) NULL);
X break;
X case 'b':
X bodval = ma2d ((bStrT) optarg, NULBSTR, FALSE, "Body Value",
X (bStrT *) NULL);
X break;
X case 'c':
X colon = ma2d ((bStrT) optarg, NULBSTR, FALSE, S("Colon Smoothing"),
X (bStrT *) NULL);
X break;
X case 'd':
X debug = TRUE;
X break;
X case 'f':
X fixbody = TRUE;
X break;
X case 'h':
X headval = ma2d ((bStrT) optarg, NULBSTR, FALSE, S("Header Value"),
X (bStrT *) NULL);
X break;
X case 'm':
X mailbox = TRUE;
X break;
X case 's':
X siglns = mra2u ((bStrT) optarg, NULBSTR, FALSE, "Max signature lines",
X (unsigned) 1, (unsigned) 99, (bStrT *) NULL);
X break;
X case 't':
X thresh = ma2d ((bStrT) optarg, NULBSTR, FALSE, "Threshold",
X (bStrT *) NULL);
X break;
X default:
X malf1 (eIntern, "main 1");
X break;
X }
X }
Xargv += optind;
Xcp = *argv++;
Xif (NULBSTR != cp) usage();
Xipath();
Xmrdtri (S("seus"), (bStrT) seustt);
Xseus (stdin, S("Standard Input"));
Xmfflush (stdout, "Standard Output");
Xexit (SUCCESS);
X
X#ifdef lint
Xreturn (SUCCESS);
X#endif
X}
END_OF_FILE
if test 50344 -ne `wc -c <'78seus.c'`; then
echo shar: \"'78seus.c'\" unpacked with wrong size!
fi
# end of '78seus.c'
fi
echo shar: End of archive 10 \(of 14\).
cp /dev/null ark10isdone
MISSING=""
for I in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 ; do
if test ! -f ark${I}isdone ; then
MISSING="${MISSING} ${I}"
fi
done
if test "${MISSING}" = "" ; then
echo You have unpacked all 14 archives.
rm -f ark[1-9]isdone ark[1-9][0-9]isdone
else
echo You still need to unpack the following archives:
echo " " ${MISSING}
fi
## End of shell archive.
exit 0
More information about the Comp.sources.misc
mailing list