Compare commits

...

1 Commits

Author SHA1 Message Date
  Ralph Rönnquist 25cbcb24c1 deb-ized 5 months ago
18 changed files with 167 additions and 16 deletions
Split View
  1. +26
    -3
      Makefile
  2. +32
    -8
      README.adoc
  3. +40
    -0
      bigrep.8.adoc
  4. +4
    -5
      bigrep.c
  5. +1
    -0
      debian/.debhelper/bigrep/dbgsym-build-ids
  6. +12
    -0
      debian/.debhelper/bigrep/dbgsym-root/DEBIAN/control
  7. +1
    -0
      debian/.debhelper/bigrep/dbgsym-root/DEBIAN/md5sums
  8. BIN
      debian/.debhelper/bigrep/dbgsym-root/usr/lib/debug/.build-id/b7/2d04dde19ff3f87f1b9326ceea8c7cb2f84a3c.debug
  9. +1
    -0
      debian/.debhelper/bigrep/dbgsym-root/usr/share/doc/bigrep-dbgsym
  10. +0
    -0
      debian/.debhelper/generated/bigrep/installed-by-dh_installdocs
  11. +0
    -0
      debian/.debhelper/generated/bigrep/installed-by-dh_installman
  12. +5
    -0
      debian/changelog
  13. +1
    -0
      debian/compat
  14. +17
    -0
      debian/control
  15. +19
    -0
      debian/copyright
  16. +3
    -0
      debian/files
  17. +4
    -0
      debian/rules
  18. +1
    -0
      debian/source/format

+ 26
- 3
Makefile View File

@@ -1,7 +1,30 @@
CFLAGS = -g -Wall
USRBINDIR = $(DESTDIR)/usr/bin
MAN8DIR = $(DESTDIR)/usr/share/man/man8

USRBINFILES = bigrep
MAN8FILES = bigrep.8

default: $(USRBINFILES) $(MAN8FILES)

$(MAN8FILES): %: %.adoc
a2x -d manpage -f manpage $^

bigrep: bigrep.c bigram.c list.c Bigram.h List.h
$(CC) $(CFLAGS) $^ -o $@
$(CC) -g -Wall $^ -o $@

clean:
rm -f bigrep
rm -f bigrep $(MAN8FILES)

# Installation

INSTALLTARGETS = $(addprefix $(USRBINDIR)/,$(USRBINFILES))
INSTALLTARGETS += $(addprefix $(MAN8DIR)/,$(MAN8FILES))

$(USRBINDIR)/% $(MAN8DIR)/%: %
install -D -T $< $@

install: $(INSTALLTARGETS)

# Manual .deb package building (deposits into parent directory)
deb:
PREFIX= INCLUDE_PREFIX=/usr dpkg-buildpackage -us -uc --build=full

+ 32
- 8
README.adoc View File

@@ -1,14 +1,38 @@
bigrep
======

The is a flexible text search tool.
The is a text search tool using "bigram matching". It here means that
both the search string and the text files are treated as successions
of overlapping pairs of characters. A match allows the search string
bigrams to be separated or dropped, with a "badness points" cost
attached to each such action.

bigrep [ options ] text directories-and-files
=> count:file:line: text-with-bigram-highlights
For example, assume the search string is "hello" and the target text
line is "also here we are mellow", then +bigrep+ would render the
following match options:
----
*** 12: he:5 el:18 ll:19 lo:20
*** 13: he:5 el:18 ll:19
*** 14: he:5 ll:19 lo:20
*** 14: he:5 el:18
*** 14: he:5 el:18 lo:20
*** 15: he:5 ll:19
*** 16: he:5 lo:20
----

options:
-d N cost of bigram drop
-s N cost of bigram separation
-p N required percentage of bigram matches
-F flow matching = treating newline as space
The first result above would place all search bigrams into the text
line, at a total cost of 12 "badness points", which is due to the
displacement of the "el" bigram. The second result has same placements
but drops the last search bigram, which attracts the total "badness
points" cost of 13 instead. And so forth.

The default cost parameter settings are:
----
drop_cost = 1; // Cost of dropping another bigram
space_cost = 0; // Cost of dropping a bigram starting with space
displace_cost = 1; // Cost of displacing bigram other than the first
threshold_cost = 20; // Threshold for keeping option
----

+displace_cost+ is a factor that gets multiplied with the amount of
displacement.

+ 40
- 0
bigrep.8.adoc View File

@@ -0,0 +1,40 @@
bigrep(8)
=========
:doctype: manpage
:revdate: {sys:date "+%Y-%m-%d %H:%M:%S"}
:COLON: :
:EQUALS: =

NAME
----
bigrep - grep text lines using bigram matching

SYNOPSIS
--------
*bigrep* [ OPTIONS ] _text_ _file_ [ _file_* ]

DESCRIPTION
-----------
*bigrep* is a text search tool to "grep" with "bigram matching". The
input files are processed one by one in order, line-wise, and
matching lines are printed prefixed by the filename.

OPTIONS
-------

*--cost=*_d_:_s_:_m_:_t_::

The first parameter is optionally a declaration of the "badness
points" costs for not matching bigrams. The cost settings are:

* _d_ - cost of ignoring a search bigram
* _s_ - cost of ignoring a search bigram that starts with space
* _m_ - cost per character of "moving" a search bigram forward to match
* _t_ - cost threshold for including a text line in the output

The default setting is: 1:0:1:20


AUTHOR
------
Ralph Ronnquist <ralph.ronnquist@gmail.com>

+ 4
- 5
bigrep.c View File

@@ -12,21 +12,20 @@ static void usage() {
exit( 1 );
}

#if 1
/**
* tell all placement options.
*/
#if 0
static void tell(PlacementOptions *places,int at,int left) {
Placement *op;
for ( ; places ; places = places->next ) {
fprintf( stdout, "= %d:", places->value );
fprintf( stdout, "*** %d:", places->value );
for ( op = places->places ; op; op = op->next ) {
char c1 = op->i / 256;
char c2 = op->i & 0xFF;
fprintf( stdout, " %c%c:%d", c1, c2, op->j );
}
fprintf( stdout, "\n" );
break; // only tell first=best placement option
}
}
#endif
@@ -44,8 +43,8 @@ static void process(char *text,char *pathname) {
PlacementOptions *p = (PlacementOptions*) places->first;
if ( p->places ) {
fprintf( stdout, "%s:%s", pathname, line );
#if 0
tell( (PlacementOptions*) places->first, -1, 0, line );
#if 1
tell( (PlacementOptions*) places->first, -1, 0 );
#endif
PlacementOptionsList_free( places );
}


+ 1
- 0
debian/.debhelper/bigrep/dbgsym-build-ids View File

@@ -0,0 +1 @@
b72d04dde19ff3f87f1b9326ceea8c7cb2f84a3c

+ 12
- 0
debian/.debhelper/bigrep/dbgsym-root/DEBIAN/control View File

@@ -0,0 +1,12 @@
Package: bigrep-dbgsym
Source: bigrep
Version: 0.1
Auto-Built-Package: debug-symbols
Architecture: amd64
Maintainer: Ralph Ronnquist <ralph.ronnquist@gmail.com>
Installed-Size: 21
Depends: bigrep (= 0.1)
Section: debug
Priority: optional
Description: debug symbols for bigrep
Build-Ids: b72d04dde19ff3f87f1b9326ceea8c7cb2f84a3c

+ 1
- 0
debian/.debhelper/bigrep/dbgsym-root/DEBIAN/md5sums View File

@@ -0,0 +1 @@
d1199ac114ed7da07a39bd2aaa538144 usr/lib/debug/.build-id/b7/2d04dde19ff3f87f1b9326ceea8c7cb2f84a3c.debug

BIN
debian/.debhelper/bigrep/dbgsym-root/usr/lib/debug/.build-id/b7/2d04dde19ff3f87f1b9326ceea8c7cb2f84a3c.debug View File


+ 1
- 0
debian/.debhelper/bigrep/dbgsym-root/usr/share/doc/bigrep-dbgsym View File

@@ -0,0 +1 @@
bigrep

+ 0
- 0
debian/.debhelper/generated/bigrep/installed-by-dh_installdocs View File


+ 0
- 0
debian/.debhelper/generated/bigrep/installed-by-dh_installman View File


+ 5
- 0
debian/changelog View File

@@ -0,0 +1,5 @@
bigrep (0.1) unstable; urgency=medium

* initial

-- Ralph Ronnquist <ralph.ronnquist@gmail.com> Wed, 28 Apr 2021 21:56:08 +1000

+ 1
- 0
debian/compat View File

@@ -0,0 +1 @@
9

+ 17
- 0
debian/control View File

@@ -0,0 +1,17 @@
Source: bigrep
Section: text
Priority: optional
Maintainer: Ralph Ronnquist <ralph.ronnquist@gmail.com>
Build-Depends: debhelper (>= 9), asciidoc, docbook-xml, libxslt1-dev, xsltproc,
docbook-xsl
Standards-Version: 3.9.8
Homepage: https://git.devuan.org/devuan/bigrep.git
Vcs-Git: https://git.devuan.org/devuan/bigrep.git

Package: bigrep
Architecture: any
Depends: ${shlibs:Depends}, ${misc:Depends}
Description: grep text lines using bigram matching
bigrep is a text search tool to "grep" with "bigram matching". The
input files are processed one by one in order, line-wise, and
matching lines are printed prefixed by the filename.

+ 19
- 0
debian/copyright View File

@@ -0,0 +1,19 @@
Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Upstream-Name: bigrep
Source: https://git.devuan.org/devuan/bigrep.git

Files: *
Copyright: 2021, Ralph Ronnquist <ralph.ronnquist@gmail.com>
License: GPL-2
This package is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License. It is
distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.
.
See the GNU General Public License for more details. You should have
received a copy of the GNU General Public License along with this
program. If not, see <https://www.gnu.org/licenses/>. On Debian
systems, the complete text of the GNU General Public License version
2 can be found in "/usr/share/common-licenses/GPL-2".

+ 3
- 0
debian/files View File

@@ -0,0 +1,3 @@
bigrep-dbgsym_0.1_amd64.deb debug optional automatic=yes
bigrep_0.1_amd64.buildinfo text optional
bigrep_0.1_amd64.deb text optional

+ 4
- 0
debian/rules View File

@@ -0,0 +1,4 @@
#!/usr/bin/make -f

%:
dh $@

+ 1
- 0
debian/source/format View File

@@ -0,0 +1 @@
3.0 (native)

Loading…
Cancel
Save