You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

89 lines
2.1 KiB

  1. /**
  2. * Apply bigram matching to text files, or stdin
  3. */
  4. #include <stdlib.h>
  5. #include <stdio.h>
  6. #include <string.h>
  7. #include "Bigram.h"
  8. static void usage() {
  9. fprintf( stderr, "Usage: [ --cost=dr:sp:di:th ] text files...\n" );
  10. exit( 1 );
  11. }
  12. /**
  13. * tell all placement options.
  14. */
  15. #if 0
  16. static void tell(PlacementOptions *places,int at,int left) {
  17. Placement *op;
  18. for ( ; places ; places = places->next ) {
  19. fprintf( stdout, "= %d:", places->value );
  20. for ( op = places->places ; op; op = op->next ) {
  21. char c1 = op->i / 256;
  22. char c2 = op->i & 0xFF;
  23. fprintf( stdout, " %c%c:%d", c1, c2, op->j );
  24. }
  25. fprintf( stdout, "\n" );
  26. break; // only tell first=best placement option
  27. }
  28. }
  29. #endif
  30. static void process(char *text,char *pathname) {
  31. FILE *file = fopen( pathname, "r" );
  32. if ( file ) {
  33. //fprintf( stderr, "processing %s\n", pathname );
  34. size_t sz = 0;
  35. char *line = 0;
  36. int lineno = 1;
  37. while ( ( getline( &line, &sz, file ) ) >= 0 ) {
  38. PlacementOptionsList *places = bigram_places( text, line );
  39. if ( places && places->first && places->first ) {
  40. PlacementOptions *p = (PlacementOptions*) places->first;
  41. if ( p->places ) {
  42. fprintf( stdout, "%s:%s", pathname, line );
  43. #if 0
  44. tell( (PlacementOptions*) places->first, -1, 0, line );
  45. #endif
  46. PlacementOptionsList_free( places );
  47. }
  48. }
  49. free( line );
  50. line = 0;
  51. sz = 0;
  52. lineno++;
  53. }
  54. }
  55. }
  56. #define COSTOPT "--cost="
  57. extern int drop_cost; // Cost of dropping another bigram
  58. extern int space_cost; // Cost of dropping a bigram starting with space
  59. extern int displace_cost; // Cost of displacing bigram other than the first
  60. extern int threshold_cost; // Threshold for keeping option
  61. int main(int argc,char **argv) {
  62. if ( argc < 2 ) {
  63. usage(); // exits
  64. }
  65. int i = 1;
  66. char *text;
  67. if ( strncmp( argv[i], COSTOPT, strlen( COSTOPT ) ) == 0 ) {
  68. if ( sscanf( argv[i] + strlen( COSTOPT ), "%d:%d:%d:%d",
  69. &drop_cost, &space_cost, &displace_cost,
  70. &threshold_cost ) != 4 ) {
  71. usage();
  72. }
  73. i++;
  74. }
  75. text = argv[i++];
  76. for ( ; i < argc; i ++ ) {
  77. process( text, argv[i] );
  78. }
  79. return 0;
  80. }