static char rcsid[] = "$Id: asksam_to_text.c,v 1.2 1992/11/30 22:38:56 putz Exp $"; /*************************************************************************** * asksam_to_text.c - convert Digital Tradition askSam file to plain text * Created by Steve Putz, Sat Nov 7 21:36:30 PST 1992 *************************************************************************** This program worked for the july92 and oct92 versions of The Digital Tradition, but might not work for future versions. The joys of reverse engineering. I haven't really figured out the file header, but it seems to work to just skip to the first 033 002 which tends to be the first song. Likewise the song records seems to end as soon as a 001 is encountered at the top level. 000 apparently begins a new record, as part of a 10 byte header block. The 10 bytes are usually followed by one of the following two byte pairs. The apparent meanings are listed below: 033 002 - first (or only) record of a document 033 003 - first (or only) record of a document 033 022 - first record of a document (not used for songs?) 033 004 - middle record of a document 033 005 - middle record of a document 033 010 - last record of a document 033 011 - last record of a document 033 030 - last record of a document (not used for songs?) If none of these codes appear, do not consume the two bytes and start a new document (as if 033 002 had appeared). Note that 033 033 indicates 27 spaces (see below) and is not one of the above codes. 001 seems to indicate some kind of non-document records which apparently occur at the end of the file. There are also some 001's that occur at other places, which I ignore. 002 to 033 indicate a number of spaces, except 033 is followed by another bytes indicating the actual number of spaces. 034 indicates a newline within a record. Each record (including "middle" records) should be terminated by a newline as well. 0377 seems to be some kind of special separator, mostly occurring between "play.exe" and the file name. REP377 (i.e. space) is substituted. **************************************************************************/ #include #include #define TRUE 1 #define FALSE 0 #define RECORDSEP "===DOCUMENT BOUNDARY===" #define REP377 ' ' main(argc,argv) int argc; char *argv[]; { char *program, rep_tilde = '\0'; int stopearly = TRUE; register int ch, debug = FALSE, outline = 1; program = *argv++; if (*argv && !strcmp(*argv, "-debug")) { debug = TRUE; argv++; } if (*argv && !strcmp(*argv, "-nostop")) { stopearly = FALSE; argv++; } if (*argv && !strcmp(*argv, "-tilde")) { rep_tilde = **++argv; argv++; fprintf(stderr, "replacing '~' with '%c'\n", rep_tilde); } if (*argv) { if (strcmp(*argv, "-")) { if ((freopen(*argv, "r", stdin)) == NULL) { fprintf(stderr, "%s: unable to open %s\n", program, *argv); exit(2); } } argv++; } if (*argv) { if (strcmp(*argv, "-")) { if ((freopen(*argv, "w", stdout)) == NULL) { fprintf(stderr, "%s: unable to open %s\n", program, *argv); exit(3); } } argv++; } /* skip head and records before first song */ while ((ch = getchar()) != EOF) { if (ch != 033) continue; if ((ch = getchar()) == EOF) break; if (ch == 002) break; } while ((ch = getchar()) != EOF) { if (ch == 000) { register int i; if (debug) printf("\n[%#03o]", ch); for (i = 9; i > 0; i--) { if ((ch = getchar()) == EOF) break; if (debug) printf("[%#03o]", ch); } if ((ch = getchar()) == EOF) break; putchar('\n'); outline++; if (ch == 001 && stopearly) { puts(RECORDSEP); /* with final newline */ outline++; break; /* stop after last song */ } if (ch == 033) { register int ch2; if ((ch2 = getchar()) == EOF) break; switch (ch2) { case 002: case 003: case 022: if (debug) printf("(START)\n", ch2); puts(RECORDSEP); /* with final newline */ outline++; continue; case 004: case 005: case 010: case 011: case 024: /* speculation, never observed */ case 030: if (debug) printf("(CONTINUE)\n", ch2); continue; default: if (debug) printf("(OOPS)\n", ch2); ungetc(ch2, stdin); /* put back other chars */ } } puts(RECORDSEP); /* with final newline */ outline++; } if (ch == '~' && rep_tilde) { putchar(rep_tilde); } else if (isprint(ch)) { putchar(ch); } else if (ch == 034) { putchar('\n'); outline++; } else if (ch == 0377) { if (debug) printf("[%#03o]", ch); else putchar(REP377); } else if (ch >= 002 && ch <= 033) { /* apparently 002 to 032 indicate number of spaces */ /* and byte after 033 indicates actual number of spaces */ register int i; if (ch == 033) { if (debug) printf(">>ESC>", ch); if ((ch = getchar()) == EOF) break; } if (debug) printf(">>%d>", ch); for (i = ch; i > 0; i--) putchar(' '); } else { if (debug) printf("<%#03o>", ch); if (ch > 128 && isprint(ch-128)) { putchar(ch-128); /* this "bug" seen once in DT data */ fprintf(stderr, "line %d: converting %#03o to '%c'\n", outline, ch, ch-128); } else if (ch != 001) {/* too many 001 chars to report */ fprintf(stderr, "line %d: ignoring %#03o\n", outline, ch); } } } exit(0); } /* end main */ /***************************************************************************/