mirror of
				https://github.com/BurntSushi/ripgrep.git
				synced 2025-10-30 23:17:47 +02:00 
			
		
		
		
	
				
					committed by
					
						 Andrew Gallant
						Andrew Gallant
					
				
			
			
				
	
			
			
			
						parent
						
							32db773d51
						
					
				
				
					commit
					02de97b8ce
				
			
							
								
								
									
										7
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										7
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							| @@ -2,6 +2,7 @@ | ||||
| name = "ripgrep" | ||||
| version = "0.2.6" | ||||
| dependencies = [ | ||||
|  "bytecount 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", | ||||
|  "ctrlc 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)", | ||||
|  "docopt 0.6.86 (registry+https://github.com/rust-lang/crates.io-index)", | ||||
|  "env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", | ||||
| @@ -28,6 +29,11 @@ dependencies = [ | ||||
|  "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", | ||||
| ] | ||||
|  | ||||
| [[package]] | ||||
| name = "bytecount" | ||||
| version = "0.1.4" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
|  | ||||
| [[package]] | ||||
| name = "crossbeam" | ||||
| version = "0.2.10" | ||||
| @@ -282,6 +288,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
|  | ||||
| [metadata] | ||||
| "checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66" | ||||
| "checksum bytecount 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49e3c21915578e2300b08d3c174a8ac887e0c6421dff86fdc4d741dc29e5d413" | ||||
| "checksum crossbeam 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)" = "0c5ea215664ca264da8a9d9c3be80d2eaf30923c259d03e870388eb927508f97" | ||||
| "checksum ctrlc 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "77f98bb69e3fefadcc5ca80a1368a55251f70295168203e01165bcaecb270891" | ||||
| "checksum docopt 0.6.86 (registry+https://github.com/rust-lang/crates.io-index)" = "4a7ef30445607f6fc8720f0a0a2c7442284b629cf0d049286860fae23e71c4d9" | ||||
|   | ||||
| @@ -24,6 +24,7 @@ name = "integration" | ||||
| path = "tests/tests.rs" | ||||
|  | ||||
| [dependencies] | ||||
| bytecount = "0.1.4" | ||||
| ctrlc = "2.0" | ||||
| docopt = "0.6" | ||||
| env_logger = "0.3" | ||||
|   | ||||
| @@ -4,6 +4,8 @@ printing matches. In particular, it searches the file in a streaming fashion | ||||
| using `read` calls and a (roughly) fixed size buffer. | ||||
| */ | ||||
|  | ||||
| extern crate bytecount; | ||||
|  | ||||
| use std::cmp; | ||||
| use std::error::Error as StdError; | ||||
| use std::fmt; | ||||
| @@ -582,89 +584,9 @@ pub fn is_binary(buf: &[u8]) -> bool { | ||||
| } | ||||
|  | ||||
| /// Count the number of lines in the given buffer. | ||||
| #[inline(never)] | ||||
|  | ||||
| #[inline(never)] | ||||
| pub fn count_lines(buf: &[u8], eol: u8) -> u64 { | ||||
|     // This was adapted from code in the memchr crate. The specific benefit | ||||
|     // here is that we can avoid a branch in the inner loop because all we're | ||||
|     // doing is counting. | ||||
|  | ||||
|     // The technique to count EOL bytes was adapted from: | ||||
|     // http://bits.stephan-brumme.com/null.html | ||||
|     const LO_U64: u64 = 0x0101010101010101; | ||||
|     const HI_U64: u64 = 0x8080808080808080; | ||||
|  | ||||
|     // use truncation | ||||
|     const LO_USIZE: usize = LO_U64 as usize; | ||||
|     const HI_USIZE: usize = HI_U64 as usize; | ||||
|  | ||||
|     #[cfg(target_pointer_width = "32")] | ||||
|     const USIZE_BYTES: usize = 4; | ||||
|     #[cfg(target_pointer_width = "64")] | ||||
|     const USIZE_BYTES: usize = 8; | ||||
|  | ||||
|     fn count_eol(eol: usize) -> u64 { | ||||
|         // Ideally, this would compile down to a POPCNT instruction, but | ||||
|         // it looks like you need to set RUSTFLAGS="-C target-cpu=native" | ||||
|         // (or target-feature=+popcnt) to get that to work. Bummer. | ||||
|         (eol.wrapping_sub(LO_USIZE) & !eol & HI_USIZE).count_ones() as u64 | ||||
|     } | ||||
|  | ||||
|     #[cfg(target_pointer_width = "32")] | ||||
|     fn repeat_byte(b: u8) -> usize { | ||||
|         let mut rep = (b as usize) << 8 | b as usize; | ||||
|         rep = rep << 16 | rep; | ||||
|         rep | ||||
|     } | ||||
|  | ||||
|     #[cfg(target_pointer_width = "64")] | ||||
|     fn repeat_byte(b: u8) -> usize { | ||||
|         let mut rep = (b as usize) << 8 | b as usize; | ||||
|         rep = rep << 16 | rep; | ||||
|         rep = rep << 32 | rep; | ||||
|         rep | ||||
|     } | ||||
|  | ||||
|     fn count_lines_slow(mut buf: &[u8], eol: u8) -> u64 { | ||||
|         let mut count = 0; | ||||
|         while let Some(pos) = memchr(eol, buf) { | ||||
|             count += 1; | ||||
|             buf = &buf[pos + 1..]; | ||||
|         } | ||||
|         count | ||||
|     } | ||||
|  | ||||
|     let len = buf.len(); | ||||
|     let ptr = buf.as_ptr(); | ||||
|     let mut count = 0; | ||||
|  | ||||
|     // Search up to an aligned boundary... | ||||
|     let align = (ptr as usize) & (USIZE_BYTES - 1); | ||||
|     let mut i = 0; | ||||
|     if align > 0 { | ||||
|         i = cmp::min(USIZE_BYTES - align, len); | ||||
|         count += count_lines_slow(&buf[..i], eol); | ||||
|     } | ||||
|  | ||||
|     // ... and search the rest. | ||||
|     let repeated_eol = repeat_byte(eol); | ||||
|  | ||||
|     if len >= 2 * USIZE_BYTES { | ||||
|         while i <= len - (2 * USIZE_BYTES) { | ||||
|             unsafe { | ||||
|                 let u = *(ptr.offset(i as isize) as *const usize); | ||||
|                 let v = *(ptr.offset((i + USIZE_BYTES) as isize) | ||||
|                           as *const usize); | ||||
|  | ||||
|                 count += count_eol(u ^ repeated_eol); | ||||
|                 count += count_eol(v ^ repeated_eol); | ||||
|             } | ||||
|             i += USIZE_BYTES * 2; | ||||
|         } | ||||
|     } | ||||
|     count += count_lines_slow(&buf[i..], eol); | ||||
|     count | ||||
|     bytecount::count(buf, eol) as u64 | ||||
| } | ||||
|  | ||||
| /// Replaces a with b in buf. | ||||
|   | ||||
| @@ -789,6 +789,15 @@ clean!(regression_127, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { | ||||
|     assert_eq!(lines, expected); | ||||
| }); | ||||
|  | ||||
| // See: https://github.com/BurntSushi/ripgrep/issues/128 | ||||
| clean!(regression_128, "x", ".", |wd: WorkDir, mut cmd: Command| { | ||||
|     wd.create_bytes("foo", b"01234567\x0b\n\x0b\n\x0b\n\x0b\nx"); | ||||
|     cmd.arg("-n"); | ||||
|  | ||||
|     let lines: String = wd.stdout(&mut cmd); | ||||
|     assert_eq!(lines, "foo:5:x\n"); | ||||
| }); | ||||
|  | ||||
| // See: https://github.com/BurntSushi/ripgrep/issues/131 | ||||
| // | ||||
| // TODO(burntsushi): Darwin doesn't like this test for some reason. | ||||
|   | ||||
| @@ -43,9 +43,14 @@ impl WorkDir { | ||||
|  | ||||
|     /// Create a new file with the given name and contents in this directory. | ||||
|     pub fn create<P: AsRef<Path>>(&self, name: P, contents: &str) { | ||||
|         self.create_bytes(name, contents.as_bytes()); | ||||
|     } | ||||
|  | ||||
|     /// Create a new file with the given name and contents in this directory. | ||||
|     pub fn create_bytes<P: AsRef<Path>>(&self, name: P, contents: &[u8]) { | ||||
|         let path = self.dir.join(name); | ||||
|         let mut file = nice_err(&path, File::create(&path)); | ||||
|         nice_err(&path, file.write_all(contents.as_bytes())); | ||||
|         nice_err(&path, file.write_all(contents)); | ||||
|         nice_err(&path, file.flush()); | ||||
|     } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user