commit 9d1e619ff359b6e609b02f01e36952e603104bc6 Author: Andrew Gallant Date: Sat Feb 27 11:07:26 2016 -0500 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..579d99f2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +.*.swp +tags +target +*.lock +tmp +*.csv +*.fst +*-got +*.csv.idx +words +98m* +dict +test +months diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..b26fdbbd --- /dev/null +++ b/.travis.yml @@ -0,0 +1,12 @@ +language: rust +rust: + - stable + - beta + - nightly +script: + - cargo build --verbose + - cargo doc + - cargo test --verbose + - if [ "$TRAVIS_RUST_VERSION" = "nightly" ]; then + cargo bench --verbose; + fi diff --git a/COPYING b/COPYING new file mode 100644 index 00000000..bb9c20a0 --- /dev/null +++ b/COPYING @@ -0,0 +1,3 @@ +This project is dual-licensed under the Unlicense and MIT licenses. + +You may use this code under the terms of either license. diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 00000000..e562a584 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "rep" +version = "0.1.0" #:version +authors = ["Andrew Gallant "] +description = """ +Line oriented search tool using Rust's regex library. +""" +documentation = "https://github.com/BurntSushi/rep" +homepage = "https://github.com/BurntSushi/rep" +repository = "https://github.com/BurntSushi/rep" +readme = "README.md" +keywords = ["regex", "grep", "egrep", "search", "pattern"] +license = "Unlicense/MIT" + +[dependencies] +docopt = "0.6" +regex = { version = "0.1", path = "/home/andrew/rust/regex" } +rustc-serialize = "0.3" + +[profile.release] +debug = true diff --git a/LICENSE-MIT b/LICENSE-MIT new file mode 100644 index 00000000..3b0a5dc0 --- /dev/null +++ b/LICENSE-MIT @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 Andrew Gallant + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 00000000..be907687 --- /dev/null +++ b/README.md @@ -0,0 +1,63 @@ +fst +=== +This crate provides a fast implementation of ordered sets and maps using finite +state machines. In particular, it makes use of finite state transducers to map +keys to values as the machine is executed. Using finite state machines as data +structures enables us to store keys in a compact format that is also easily +searchable. For example, this crate levages memory maps to make range queries, +regular expression queries and Levenshtein (edit) distance queries very fast. + +Check out my blog post +[Index 1,600,000,000 Keys with Automata and +Rust](http://blog.burntsushi.net/transducers/) +for extensive background, examples and experiments. + +[![Linux build status](https://api.travis-ci.org/BurntSushi/fst.png)](https://travis-ci.org/BurntSushi/fst) +[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/fst?svg=true)](https://ci.appveyor.com/project/BurntSushi/fst) +[![](http://meritbadge.herokuapp.com/fst)](https://crates.io/crates/fst) + +Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org). + + +### Documentation + +[Full API documentation and examples.](http://burntsushi.net/rustdoc/fst/) + + +### Installation + +Simply add a corresponding entry to your `Cargo.toml` dependency list: + +```ignore +[dependencies] +fst = "0.1" +``` + +And add this to your crate root: + +```ignore +extern crate fst; +``` + + +### Example + +This example demonstrates building a set in memory and executing a fuzzy query +against it. Check out the documentation for a lot more examples! + +```rust +use fst::{IntoStreamer, Streamer, Levenshtein, Set}; + +// A convenient way to create sets in memory. +let keys = vec!["fa", "fo", "fob", "focus", "foo", "food", "foul"]; +let set = try!(Set::from_iter(keys)); + +// Build our fuzzy query. +let lev = try!(Levenshtein::new("foo", 1)); + +// Apply our fuzzy query to the set we built. +let mut stream = set.search(lev).into_stream(); + +let keys = try!(stream.into_strs()); +assert_eq!(keys, vec!["fo", "fob", "foo", "food"]); +``` diff --git a/UNLICENSE b/UNLICENSE new file mode 100644 index 00000000..68a49daa --- /dev/null +++ b/UNLICENSE @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 00000000..ebb689b1 --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,17 @@ +environment: + matrix: + - TARGET: x86_64-pc-windows-msvc + - TARGET: i686-pc-windows-gnu +install: + - ps: Start-FileDownload "https://static.rust-lang.org/dist/rust-nightly-${env:TARGET}.exe" + - rust-nightly-%TARGET%.exe /VERYSILENT /NORESTART /DIR="C:\Program Files (x86)\Rust" + - SET PATH=%PATH%;C:\Program Files (x86)\Rust\bin + - SET PATH=%PATH%;C:\MinGW\bin + - rustc -V + - cargo -V + +build: false + +test_script: + - cargo build --verbose + - cargo test --verbose diff --git a/ctags.rust b/ctags.rust new file mode 100644 index 00000000..b42edf75 --- /dev/null +++ b/ctags.rust @@ -0,0 +1,11 @@ +--langdef=Rust +--langmap=Rust:.rs +--regex-Rust=/^[ \t]*(#\[[^\]]\][ \t]*)*(pub[ \t]+)?(extern[ \t]+)?("[^"]+"[ \t]+)?(unsafe[ \t]+)?fn[ \t]+([a-zA-Z0-9_]+)/\6/f,functions,function definitions/ +--regex-Rust=/^[ \t]*(pub[ \t]+)?type[ \t]+([a-zA-Z0-9_]+)/\2/T,types,type definitions/ +--regex-Rust=/^[ \t]*(pub[ \t]+)?enum[ \t]+([a-zA-Z0-9_]+)/\2/g,enum,enumeration names/ +--regex-Rust=/^[ \t]*(pub[ \t]+)?struct[ \t]+([a-zA-Z0-9_]+)/\2/s,structure names/ +--regex-Rust=/^[ \t]*(pub[ \t]+)?mod[ \t]+([a-zA-Z0-9_]+)/\2/m,modules,module names/ +--regex-Rust=/^[ \t]*(pub[ \t]+)?static[ \t]+([a-zA-Z0-9_]+)/\2/c,consts,static constants/ +--regex-Rust=/^[ \t]*(pub[ \t]+)?trait[ \t]+([a-zA-Z0-9_]+)/\2/t,traits,traits/ +--regex-Rust=/^[ \t]*(pub[ \t]+)?impl([ \t\n]+<.*>)?[ \t]+([a-zA-Z0-9_]+)/\3/i,impls,trait implementations/ +--regex-Rust=/^[ \t]*macro_rules![ \t]+([a-zA-Z0-9_]+)/\1/d,macros,macro definitions/ diff --git a/session.vim b/session.vim new file mode 100644 index 00000000..213c9566 --- /dev/null +++ b/session.vim @@ -0,0 +1 @@ +au BufWritePost *.rs silent!make ctags > /dev/null 2>&1 diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 00000000..62fe205c --- /dev/null +++ b/src/main.rs @@ -0,0 +1,72 @@ +#![allow(dead_code)] + +extern crate docopt; +extern crate regex; +extern crate rustc_serialize; + +const USAGE: &'static str = " +Usage: rep [options] [ ...] +"; + +use std::error::Error; +use std::io::{self, BufRead, Write}; +use std::process; +use std::result; + +use docopt::Docopt; +use regex::internal::{ExecBuilder, Search}; + +type Result = result::Result>; + +#[derive(RustcDecodable)] +struct Args { + arg_pattern: String, + arg_file: Vec, +} + +fn main() { + let args = Docopt::new(USAGE).and_then(|d| d.decode()) + .unwrap_or_else(|e| e.exit()); + match run(&args) { + Ok(count) if count == 0 => process::exit(1), + Ok(_) => process::exit(0), + Err(err) => { + let _ = writeln!(&mut io::stderr(), "{}", err); + process::exit(1); + } + } +} + +fn run(args: &Args) -> Result { + let _stdin = io::stdin(); + let mut rdr = io::BufReader::new(_stdin.lock()); + let mut wtr = io::BufWriter::new(io::stdout()); + let mut count = 0; + let mut nline = 0; + let mut line = vec![]; + let re = try!(ExecBuilder::new(&args.arg_pattern).only_utf8(false).build()); + let mut search = Search { + captures: &mut [], + matches: &mut [false], + }; + loop { + line.clear(); + let n = try!(rdr.read_until(b'\n', &mut line)); + if n == 0 { + break; + } + nline += 1; + if line.last().map_or(false, |&b| b == b'\n') { + line.pop().unwrap(); + } + search.matches[0] = false; + if re.exec(&mut search, &line, 0) { + count += 1; + try!(wtr.write(nline.to_string().as_bytes())); + try!(wtr.write(&[b':'])); + try!(wtr.write(&line)); + try!(wtr.write(&[b'\n'])); + } + } + Ok(count) +}