token822.c (11376B)
1 #include "token822.h" 2 3 #include "stralloc.h" 4 #include "str.h" 5 #include "gen_allocdefs.h" 6 7 static struct token822 comma = { TOKEN822_COMMA }; 8 9 void token822_reverse(ta) 10 token822_alloc *ta; 11 { 12 int i; 13 int n; 14 struct token822 temp; 15 16 n = ta->len - 1; 17 for (i = 0;i + i < n;++i) 18 { 19 temp = ta->t[i]; 20 ta->t[i] = ta->t[n - i]; 21 ta->t[n - i] = temp; 22 } 23 } 24 25 GEN_ALLOC_readyplus(token822_alloc,struct token822,t,len,a,30,token822_readyplus) 26 GEN_ALLOC_ready(token822_alloc,struct token822,t,len,a,30,token822_ready) 27 GEN_ALLOC_append(token822_alloc,struct token822,t,len,a,30,token822_readyplus,token822_append) 28 29 static int needspace(t1,t2) 30 int t1; 31 int t2; 32 { 33 if (!t1) return 0; 34 if (t1 == TOKEN822_COLON) return 1; 35 if (t1 == TOKEN822_COMMA) return 1; 36 if (t2 == TOKEN822_LEFT) return 1; 37 switch(t1) 38 { 39 case TOKEN822_ATOM: case TOKEN822_LITERAL: 40 case TOKEN822_QUOTE: case TOKEN822_COMMENT: 41 switch(t2) 42 { 43 case TOKEN822_ATOM: case TOKEN822_LITERAL: 44 case TOKEN822_QUOTE: case TOKEN822_COMMENT: 45 return 1; 46 } 47 } 48 return 0; 49 } 50 51 static int atomok(ch) 52 char ch; 53 { 54 switch(ch) 55 { 56 case ' ': case '\t': case '\r': case '\n': 57 case '(': case '[': case '"': 58 case '<': case '>': case ';': case ':': 59 case '@': case ',': case '.': 60 return 0; 61 } 62 return 1; 63 } 64 65 static void atomcheck(t) 66 struct token822 *t; 67 { 68 int i; 69 char ch; 70 for (i = 0;i < t->slen;++i) 71 { 72 ch = t->s[i]; 73 if ((ch < 32) || (ch > 126) || (ch == ')') || (ch == ']') || (ch == '\\')) 74 { 75 t->type = TOKEN822_QUOTE; 76 return; 77 } 78 } 79 } 80 81 int token822_unparse(sa,ta,linelen) 82 stralloc *sa; 83 token822_alloc *ta; 84 unsigned int linelen; 85 { 86 struct token822 *t; 87 int len; 88 int ch; 89 int i; 90 int j; 91 int lasttype; 92 int newtype; 93 char *s; 94 char *lineb; 95 char *linee; 96 97 len = 0; 98 lasttype = 0; 99 for (i = 0;i < ta->len;++i) 100 { 101 t = ta->t + i; 102 newtype = t->type; 103 if (needspace(lasttype,newtype)) 104 ++len; 105 lasttype = newtype; 106 switch(newtype) 107 { 108 case TOKEN822_COMMA: 109 len += 3; break; 110 case TOKEN822_AT: case TOKEN822_DOT: case TOKEN822_LEFT: case TOKEN822_RIGHT: 111 case TOKEN822_SEMI: case TOKEN822_COLON: 112 ++len; break; 113 case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL: case TOKEN822_COMMENT: 114 if (t->type != TOKEN822_ATOM) len += 2; 115 for (j = 0;j < t->slen;++j) 116 switch(ch = t->s[j]) 117 { 118 case '"': case '[': case ']': case '(': case ')': 119 case '\\': case '\r': case '\n': ++len; 120 default: ++len; 121 } 122 break; 123 } 124 } 125 len += 2; 126 127 if (!stralloc_ready(sa,len)) 128 return -1; 129 130 s = sa->s; 131 lineb = s; 132 linee = 0; 133 134 lasttype = 0; 135 for (i = 0;i < ta->len;++i) 136 { 137 t = ta->t + i; 138 newtype = t->type; 139 if (needspace(lasttype,newtype)) 140 *s++ = ' '; 141 lasttype = newtype; 142 switch(newtype) 143 { 144 case TOKEN822_COMMA: 145 *s++ = ','; 146 #define NSUW \ 147 s[0] = '\n'; s[1] = ' '; \ 148 if (linee && (!linelen || (s - lineb <= linelen))) \ 149 { while (linee < s) { linee[0] = linee[2]; ++linee; } linee -= 2; } \ 150 else { if (linee) lineb = linee + 1; linee = s; s += 2; } 151 NSUW 152 break; 153 case TOKEN822_AT: *s++ = '@'; break; 154 case TOKEN822_DOT: *s++ = '.'; break; 155 case TOKEN822_LEFT: *s++ = '<'; break; 156 case TOKEN822_RIGHT: *s++ = '>'; break; 157 case TOKEN822_SEMI: *s++ = ';'; break; 158 case TOKEN822_COLON: *s++ = ':'; break; 159 case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL: case TOKEN822_COMMENT: 160 if (t->type == TOKEN822_QUOTE) *s++ = '"'; 161 if (t->type == TOKEN822_LITERAL) *s++ = '['; 162 if (t->type == TOKEN822_COMMENT) *s++ = '('; 163 for (j = 0;j < t->slen;++j) 164 switch(ch = t->s[j]) 165 { 166 case '"': case '[': case ']': case '(': case ')': 167 case '\\': case '\r': case '\n': *s++ = '\\'; 168 default: *s++ = ch; 169 } 170 if (t->type == TOKEN822_QUOTE) *s++ = '"'; 171 if (t->type == TOKEN822_LITERAL) *s++ = ']'; 172 if (t->type == TOKEN822_COMMENT) *s++ = ')'; 173 break; 174 } 175 } 176 NSUW 177 --s; 178 sa->len = s - sa->s; 179 return 1; 180 } 181 182 int token822_unquote(sa,ta) 183 stralloc *sa; 184 token822_alloc *ta; 185 { 186 struct token822 *t; 187 int len; 188 int i; 189 int j; 190 char *s; 191 192 len = 0; 193 for (i = 0;i < ta->len;++i) 194 { 195 t = ta->t + i; 196 switch(t->type) 197 { 198 case TOKEN822_COMMA: case TOKEN822_AT: case TOKEN822_DOT: case TOKEN822_LEFT: 199 case TOKEN822_RIGHT: case TOKEN822_SEMI: case TOKEN822_COLON: 200 ++len; break; 201 case TOKEN822_LITERAL: 202 len += 2; 203 case TOKEN822_ATOM: case TOKEN822_QUOTE: 204 len += t->slen; 205 } 206 } 207 208 if (!stralloc_ready(sa,len)) 209 return -1; 210 211 s = sa->s; 212 213 for (i = 0;i < ta->len;++i) 214 { 215 t = ta->t + i; 216 switch(t->type) 217 { 218 case TOKEN822_COMMA: *s++ = ','; break; 219 case TOKEN822_AT: *s++ = '@'; break; 220 case TOKEN822_DOT: *s++ = '.'; break; 221 case TOKEN822_LEFT: *s++ = '<'; break; 222 case TOKEN822_RIGHT: *s++ = '>'; break; 223 case TOKEN822_SEMI: *s++ = ';'; break; 224 case TOKEN822_COLON: *s++ = ':'; break; 225 case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL: 226 if (t->type == TOKEN822_LITERAL) *s++ = '['; 227 for (j = 0;j < t->slen;++j) 228 *s++ = t->s[j]; 229 if (t->type == TOKEN822_LITERAL) *s++ = ']'; 230 break; 231 case TOKEN822_COMMENT: break; 232 } 233 } 234 sa->len = s - sa->s; 235 return 1; 236 } 237 238 int token822_parse(ta,sa,buf) 239 token822_alloc *ta; 240 stralloc *sa; 241 stralloc *buf; 242 { 243 int i; 244 int salen; 245 int level; 246 struct token822 *t; 247 int numtoks; 248 int numchars; 249 char *cbuf; 250 251 salen = sa->len; 252 253 numchars = 0; 254 numtoks = 0; 255 for (i = 0;i < salen;++i) 256 switch(sa->s[i]) 257 { 258 case '.': case ',': case '@': case '<': case '>': case ':': case ';': 259 ++numtoks; break; 260 case ' ': case '\t': case '\r': case '\n': break; 261 case ')': case ']': return 0; 262 /* other control chars and non-ASCII chars are also bad, in theory */ 263 case '(': 264 level = 1; 265 while (level) 266 { 267 if (++i >= salen) return 0; 268 switch(sa->s[i]) 269 { 270 case '(': ++level; break; 271 case ')': --level; break; 272 case '\\': if (++i >= salen) return 0; 273 default: ++numchars; 274 } 275 } 276 ++numtoks; 277 break; 278 case '"': 279 level = 1; 280 while (level) 281 { 282 if (++i >= salen) return 0; 283 switch(sa->s[i]) 284 { 285 case '"': --level; break; 286 case '\\': if (++i >= salen) return 0; 287 default: ++numchars; 288 } 289 } 290 ++numtoks; 291 break; 292 case '[': 293 level = 1; 294 while (level) 295 { 296 if (++i >= salen) return 0; 297 switch(sa->s[i]) 298 { 299 case ']': --level; break; 300 case '\\': if (++i >= salen) return 0; 301 default: ++numchars; 302 } 303 } 304 ++numtoks; 305 break; 306 default: 307 do 308 { 309 if (sa->s[i] == '\\') if (++i >= salen) break; 310 ++numchars; 311 if (++i >= salen) 312 break; 313 } 314 while (atomok(sa->s[i])); 315 --i; 316 ++numtoks; 317 } 318 319 if (!token822_ready(ta,numtoks)) 320 return -1; 321 if (!stralloc_ready(buf,numchars)) 322 return -1; 323 cbuf = buf->s; 324 ta->len = numtoks; 325 326 t = ta->t; 327 for (i = 0;i < salen;++i) 328 switch(sa->s[i]) 329 { 330 case '.': t->type = TOKEN822_DOT; ++t; break; 331 case ',': t->type = TOKEN822_COMMA; ++t; break; 332 case '@': t->type = TOKEN822_AT; ++t; break; 333 case '<': t->type = TOKEN822_LEFT; ++t; break; 334 case '>': t->type = TOKEN822_RIGHT; ++t; break; 335 case ':': t->type = TOKEN822_COLON; ++t; break; 336 case ';': t->type = TOKEN822_SEMI; ++t; break; 337 case ' ': case '\t': case '\r': case '\n': break; 338 case '(': 339 t->type = TOKEN822_COMMENT; t->s = cbuf; t->slen = 0; 340 level = 1; 341 while (level) 342 { 343 ++i; /* assert: < salen */ 344 switch(sa->s[i]) 345 { 346 case '(': ++level; break; 347 case ')': --level; break; 348 case '\\': ++i; /* assert: < salen */ 349 default: *cbuf++ = sa->s[i]; ++t->slen; 350 } 351 } 352 ++t; 353 break; 354 case '"': 355 t->type = TOKEN822_QUOTE; t->s = cbuf; t->slen = 0; 356 level = 1; 357 while (level) 358 { 359 ++i; /* assert: < salen */ 360 switch(sa->s[i]) 361 { 362 case '"': --level; break; 363 case '\\': ++i; /* assert: < salen */ 364 default: *cbuf++ = sa->s[i]; ++t->slen; 365 } 366 } 367 ++t; 368 break; 369 case '[': 370 t->type = TOKEN822_LITERAL; t->s = cbuf; t->slen = 0; 371 level = 1; 372 while (level) 373 { 374 ++i; /* assert: < salen */ 375 switch(sa->s[i]) 376 { 377 case ']': --level; break; 378 case '\\': ++i; /* assert: < salen */ 379 default: *cbuf++ = sa->s[i]; ++t->slen; 380 } 381 } 382 ++t; 383 break; 384 default: 385 t->type = TOKEN822_ATOM; t->s = cbuf; t->slen = 0; 386 do 387 { 388 if (sa->s[i] == '\\') if (++i >= salen) break; 389 *cbuf++ = sa->s[i]; ++t->slen; 390 if (++i >= salen) 391 break; 392 } 393 while (atomok(sa->s[i])); 394 atomcheck(t); 395 --i; 396 ++t; 397 } 398 return 1; 399 } 400 401 static int gotaddr(taout,taaddr,callback) 402 token822_alloc *taout; 403 token822_alloc *taaddr; 404 int (*callback)(); 405 { 406 int i; 407 408 if (callback(taaddr) != 1) 409 return 0; 410 411 if (!token822_readyplus(taout,taaddr->len)) 412 return 0; 413 414 for (i = 0;i < taaddr->len;++i) 415 taout->t[taout->len++] = taaddr->t[i]; 416 417 taaddr->len = 0; 418 return 1; 419 } 420 421 int token822_addrlist(taout,taaddr,ta,callback) 422 token822_alloc *taout; 423 token822_alloc *taaddr; 424 token822_alloc *ta; 425 int (*callback)(); 426 { 427 struct token822 *t; 428 struct token822 *beginning; 429 int ingroup; 430 int wordok; 431 432 taout->len = 0; 433 taaddr->len = 0; 434 435 if (!token822_readyplus(taout,1)) return -1; 436 if (!token822_readyplus(taaddr,1)) return -1; 437 438 ingroup = 0; 439 wordok = 1; 440 441 beginning = ta->t + 2; 442 t = ta->t + ta->len - 1; 443 444 /* rfc 822 address lists are easy to parse from right to left */ 445 446 #define FLUSH if (taaddr->len) if (!gotaddr(taout,taaddr,callback)) return -1; 447 #define FLUSHCOMMA if (taaddr->len) { \ 448 if (!gotaddr(taout,taaddr,callback)) return -1; \ 449 if (!token822_append(taout,&comma)) return -1; } 450 #define ADDRLEFT if (!token822_append(taaddr,t--)) return -1; 451 #define OUTLEFT if (!token822_append(taout,t--)) return -1; 452 453 while (t >= beginning) 454 { 455 switch(t->type) 456 { 457 case TOKEN822_SEMI: 458 FLUSHCOMMA 459 if (ingroup) return 0; 460 ingroup = 1; 461 wordok = 1; 462 break; 463 case TOKEN822_COLON: 464 FLUSH 465 if (!ingroup) return 0; 466 ingroup = 0; 467 while ((t >= beginning) && (t->type != TOKEN822_COMMA)) 468 OUTLEFT 469 if (t >= beginning) 470 OUTLEFT 471 wordok = 1; 472 continue; 473 case TOKEN822_RIGHT: 474 FLUSHCOMMA 475 OUTLEFT 476 while ((t >= beginning) && (t->type != TOKEN822_LEFT)) 477 ADDRLEFT 478 /* important to use address here even if it's empty: <> */ 479 if (!gotaddr(taout,taaddr,callback)) return -1; 480 if (t < beginning) return 0; 481 OUTLEFT 482 while ((t >= beginning) && ((t->type == TOKEN822_COMMENT) || (t->type == TOKEN822_ATOM) || (t->type == TOKEN822_QUOTE) || (t->type == TOKEN822_AT) || (t->type == TOKEN822_DOT))) 483 OUTLEFT 484 wordok = 0; 485 continue; 486 case TOKEN822_ATOM: case TOKEN822_QUOTE: case TOKEN822_LITERAL: 487 if (!wordok) 488 FLUSHCOMMA 489 wordok = 0; 490 ADDRLEFT 491 continue; 492 case TOKEN822_COMMENT: 493 /* comment is lexically a space; shouldn't affect wordok */ 494 break; 495 case TOKEN822_COMMA: 496 FLUSH 497 wordok = 1; 498 break; 499 default: 500 wordok = 1; 501 ADDRLEFT 502 continue; 503 } 504 OUTLEFT 505 } 506 FLUSH 507 ++t; 508 while (t > ta->t) 509 if (!token822_append(taout,--t)) return -1; 510 511 token822_reverse(taout); 512 return 1; 513 }