pub struct Reader { /* private fields */ }
Expand description
Text file reader. Lines broken into columns, with lookback
Implementations
sourceimpl Reader
impl Reader
sourcepub const fn loc(&self) -> &FileLocData
pub const fn loc(&self) -> &FileLocData
loc
Examples found in repository
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Select uniq lines.", args::FileCount::Many);
const A: [ArgSpec; 6] = [
arg! {"pattern", "p", "Col,Spec,Pattern", "Select line where this col matches this pattern."},
arg! {"show-const", "", "", "Print available constants"},
arg! {"show-func", "", "", "Print available functions"},
arg! {"or", "o", "", "A line matches if any of the matchers matches."},
arg! {"invert", "v", "", "Print lines that don't match."},
arg! {"location", "l", "name:what", "prefix extra columns of location context."},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut list = LineMatcherList::new_with(Combiner::And);
let mut reverse = false;
let mut loc = FileLocList::new();
for x in args {
if x.name == "pattern" {
list.push(&x.value)?;
} else if x.name == "or" {
list.multi = Combiner::Or;
} else if x.name == "invert" {
reverse = true;
} else if x.name == "location" {
loc.push(&x.value)?;
} else if x.name == "show-const" {
expr::show_const();
return Ok(());
} else if x.name == "show-func" {
expr::show_func();
return Ok(());
} else {
unreachable!();
}
}
let mut w = get_writer("-")?;
for x in &files {
let mut f = Reader::new_open(x)?;
if f.is_empty() {
continue;
}
list.lookup(&f.names())?;
let mut not_header = String::new();
let mut header = ColumnHeader::new();
loc.add(&mut header)?;
header.push_all(f.header())?;
if f.has_header() {
not_header = header.get_head(b'\t');
}
if settings.checker.check(not_header.as_bytes(), x)? {
w.write_all(not_header.as_bytes())?;
}
if f.is_done() {
continue;
}
loop {
if list.ok(f.curr_line()) ^ reverse {
// write previous lines of context if necessary
loc.write_data(&mut w.0, b'\t', f.loc())?;
f.write_curr(&mut w.0)?;
} else {
// write more lines of context if necessary
}
if f.getline()? {
break;
}
}
}
Ok(())
}
sourcepub fn new() -> Self
pub fn new() -> Self
make a new Reader
Examples found in repository
594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666
fn load_hashset(data: &mut HashSet<Vec<u8>>, fname: &str) -> Result<()> {
let mut f = Reader::new();
f.do_split(false);
f.open(fname)?;
if f.is_done() {
return Ok(());
}
loop {
let line = &f.curr().line();
if line.len() > 1 {
data.insert(line[0..line.len() - 1].to_vec());
}
if f.getline()? {
break;
}
}
Ok(())
}
#[derive(Debug, Clone)]
/// pattern is file name. String exactly matches one line of file.
struct FileExactMatch {
data: HashSet<Vec<u8>>,
file_name: String,
}
impl FileExactMatch {
fn new(file_name: &str) -> Result<Self> {
let mut d = HashSet::new();
load_hashset(&mut d, file_name)?;
Ok(Self {
data: d,
file_name: file_name.to_string(),
})
}
}
impl Match for FileExactMatch {
fn smatch(&self, buff: &str) -> bool {
self.data.contains(buff.as_bytes())
}
fn umatch(&self, buff: &[u8]) -> bool {
self.data.contains(buff)
}
fn show(&self) -> String {
format!("Exact Match of one line in file {}", self.file_name)
}
}
fn load_hashset_c(data: &mut HashSet<Vec<u8>>, fname: &str, unicode: bool) -> Result<()> {
let mut f = Reader::new();
f.do_split(false);
f.open(fname)?;
if f.is_done() {
return Ok(());
}
loop {
let mut line: &[u8] = f.curr().line();
if line.len() > 1 {
if line.last().unwrap() == &b'\n' {
line = &line[..line.len() - 1];
}
if unicode {
data.insert(String::from_utf8(line.to_vec())?.new_lower().into_bytes());
// PERF - 2 allocations
} else {
data.insert(line.new_lower());
}
}
if f.getline()? {
break;
}
}
Ok(())
}
More examples
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
pub fn transpose(file: &str, head: bool, max_lines: usize) -> Result<()> {
let mut f = Reader::new();
f.open(file)?;
if f.is_empty() {
return Ok(());
}
let mut data = Vec::new();
let mut lines = 0;
while lines < max_lines {
data.push(f.curr().clone());
lines += 1;
if f.getline()? {
break;
}
}
let mut w = get_writer("-")?;
if head {
w.write_all(b" CDX\t")?;
}
for i in 0..f.header().len() {
let mut need_tab = if f.has_header() {
w.write_all(f.header()[i].as_bytes())?;
true
} else {
false
};
for x in &data {
if need_tab {
w.write_all(b"\t")?;
}
need_tab = true;
w.write_all(&x[i])?;
}
w.write_all(b"\n")?;
}
Ok(())
}
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Evaluate Formatted Expressions.", args::FileCount::Many);
const A: [ArgSpec; 1] = [arg! {"fmt", "f", "Format", "How to format values."}];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut fmt = NumFormat::default();
for x in args {
if x.name == "fmt" {
fmt = NumFormat::new(&x.value)?;
} else {
unreachable!();
}
}
let mut w = get_writer("-")?;
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if f.is_empty() || f.is_done() {
continue;
}
loop {
let exp = &String::from_utf8_lossy(f.curr_nl());
let (f2, exp) = parse_fmt_expr(fmt, exp);
fmt = f2;
match calc(exp) {
Ok(v) => {
fmt.print(v, &mut w.0)?;
w.write_all(b"\n")?;
w.flush()?;
}
Err(e) => eprintln!("{}", e),
}
if f.getline()? {
break;
}
}
}
Ok(())
}
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
pub fn show(file: &str, screen: &Rect) -> Result<()> {
let mut f = Reader::new();
f.open(file)?;
if f.is_empty() {
return Ok(());
}
let mut lines: Vec<StringLine> = Vec::new();
let mut sizes: Vec<usize> = Vec::new();
sizes.resize(f.header().len(), 0);
if f.has_header() {
lines.push(f.header().clone());
}
if !f.is_done() {
while lines.len() < screen.height {
let mut s = StringLine::new();
s.line = String::from_utf8_lossy(f.curr().line()).to_string();
s.split(f.delim());
lines.push(s);
if f.getline()? {
break;
}
}
}
for x in &lines {
for (i, c) in x.iter().enumerate() {
let width = UnicodeWidthStr::width(c);
if sizes[i] < width {
sizes[i] = width;
}
}
}
let mut total: usize = sizes.iter().sum();
let target = screen.width - sizes.len();
while total > target {
dec_max(&mut sizes);
total = sizes.iter().sum();
}
let mut w = get_writer("-")?;
let mut do_center = f.has_header();
for x in &lines {
let mut need_space = false;
for (c, y) in x.iter().enumerate() {
let (nstr, width) = y.unicode_truncate(sizes[c]);
if need_space {
w.write_all(b" ")?;
}
let num = (sizes[c] - width) / 2;
if do_center {
for _ in 0..num {
w.write_all(b" ")?;
}
}
w.write_all(nstr.as_bytes())?;
if do_center {
let num2 = (sizes[c] - width) - num;
for _ in 0..num2 {
w.write_all(b" ")?;
}
} else {
for _ in width..sizes[c] {
w.write_all(b" ")?;
}
}
need_space = true;
}
do_center = false;
w.write_all(b"\n")?;
}
Ok(())
}
/// show the file in a specific rectangle
pub fn show2(file: &str, screen: &Rect, w: &mut Vec<String>) -> Result<usize> {
let mut f = Reader::new();
f.open(file)?;
if f.is_empty() {
return Ok(0);
}
let mut lines: Vec<StringLine> = Vec::new();
let mut sizes: Vec<usize> = Vec::new();
sizes.resize(f.header().len(), 0);
if f.has_header() {
lines.push(f.header().clone());
}
if !f.is_done() {
while lines.len() < screen.height {
let mut s = StringLine::new();
s.line = String::from_utf8_lossy(f.curr().line()).to_string();
s.split(f.delim());
lines.push(s);
if f.getline()? {
break;
}
}
}
for x in &lines {
for (i, c) in x.iter().enumerate() {
let width = UnicodeWidthStr::width(c);
if sizes[i] < width {
sizes[i] = width;
}
}
}
let mut total: usize = sizes.iter().sum();
let target = screen.width - sizes.len();
while total > target {
dec_max(&mut sizes);
total = sizes.iter().sum();
}
let mut do_center = f.has_header();
w.clear();
for x in &lines {
let mut s = String::new();
let mut need_space = false;
for (c, y) in x.iter().enumerate() {
let (nstr, width) = y.unicode_truncate(sizes[c]);
if need_space {
s.push(' ');
}
let num = (sizes[c] - width) / 2;
if do_center {
for _ in 0..num {
s.push(' ');
}
}
s.push_str(nstr);
if do_center {
let num2 = (sizes[c] - width) - num;
for _ in 0..num2 {
s.push(' ');
}
} else {
for _ in width..sizes[c] {
s.push(' ');
}
}
need_space = true;
}
do_center = false;
w.push(s);
}
Ok(sizes.iter().sum::<usize>() + sizes.len())
}
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Select columns", args::FileCount::Many);
const A: [ArgSpec; 5] = [
arg! {"fields", "f", "Columns", "the columns to select."},
arg! {"group", "g", "Columns", "the columns in a bunch, e.g. '.group:1-3'"},
arg! {"expr", "e", "Name:Expr", "The result of an arithmetic expression"},
arg! {"composite", "c", "Spec", "new value made from parts. e.g. 'stuff:abc^{two}def'"},
arg_enum! {"dups", "D", "Mode", "Duplicate Column Handling", &["Fail", "Allow", "Numeric"]},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut header = ColumnHeader::new();
let mut v = Writer::new(b'\t');
for x in args {
if x.name == "dups" {
header.set_handling(DupColHandling::new(&x.value)?);
} else if x.name == "fields" {
v.push(Box::new(ReaderColumns::new(ColumnSet::from_spec(
&x.value,
)?)));
} else if x.name == "group" {
v.push(Box::new(ColumnClump::from_spec(&x.value)?));
} else if x.name == "expr" {
v.push(Box::new(ColumnExpr::new(&x.value)?));
} else if x.name == "composite" {
v.push(Box::new(CompositeColumn::new(&x.value)?));
} else {
unreachable!();
}
}
if v.is_empty() {
bail!("cut requires at lease one --columns or --groups");
}
let mut w = get_writer("-")?;
let mut not_header = String::new();
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if f.is_empty() {
continue;
}
v.lookup(&f.names())?;
header.clear();
not_header.clear();
v.add_names(&mut header, f.header())?;
if f.has_header() {
not_header = header.get_head(b'\t');
}
if settings.checker.check(not_header.as_bytes(), x)? {
w.write_all(not_header.as_bytes())?;
}
if f.is_done() {
continue;
}
loop {
v.write(&mut w.0, f.curr())?;
if f.getline()? {
break;
}
}
}
Ok(())
}
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
pub fn merge_2(
&self,
left: &str,
right: &str,
cmp: &mut LineCompList,
mut w: impl Write,
unique: bool,
) -> Result<()> {
let mut left_file = Reader::new();
let mut right_file = Reader::new();
left_file.open(left)?;
right_file.open(right)?;
left_file.do_split(false);
right_file.do_split(false);
cmp.lookup(&left_file.names())?;
// FIXME -- Check Header
if left_file.has_header() {
w.write_all(left_file.header().line.as_bytes())?;
}
if unique {
let mut prev: Vec<u8> = Vec::new();
while !left_file.is_done() && !right_file.is_done() {
let ord = cmp.comp_lines(left_file.curr().line(), right_file.curr().line());
if ord == Ordering::Less {
left_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
left_file.getline()?;
} else if ord == Ordering::Greater {
right_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
right_file.getline()?;
} else {
left_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
left_file.getline()?;
right_file.getline()?;
}
while !left_file.is_done() && cmp.equal_lines(left_file.curr().line(), &prev) {
left_file.getline()?;
}
while !right_file.is_done() && cmp.equal_lines(right_file.curr().line(), &prev) {
right_file.getline()?;
}
}
} else {
while !left_file.is_done() && !right_file.is_done() {
let ord = cmp.comp_lines(left_file.curr().line(), right_file.curr().line());
// if Equal, write both lines
if ord != Ordering::Less {
right_file.write(&mut w)?;
right_file.getline()?;
}
if ord != Ordering::Greater {
left_file.write(&mut w)?;
left_file.getline()?;
}
}
}
while !left_file.is_done() {
left_file.write(&mut w)?;
left_file.getline()?;
}
while !right_file.is_done() {
right_file.write(&mut w)?;
right_file.getline()?;
}
Ok(())
}
sourcepub fn do_split(&mut self, val: bool)
pub fn do_split(&mut self, val: bool)
set to false to skip breaking into columns
Examples found in repository
594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666
fn load_hashset(data: &mut HashSet<Vec<u8>>, fname: &str) -> Result<()> {
let mut f = Reader::new();
f.do_split(false);
f.open(fname)?;
if f.is_done() {
return Ok(());
}
loop {
let line = &f.curr().line();
if line.len() > 1 {
data.insert(line[0..line.len() - 1].to_vec());
}
if f.getline()? {
break;
}
}
Ok(())
}
#[derive(Debug, Clone)]
/// pattern is file name. String exactly matches one line of file.
struct FileExactMatch {
data: HashSet<Vec<u8>>,
file_name: String,
}
impl FileExactMatch {
fn new(file_name: &str) -> Result<Self> {
let mut d = HashSet::new();
load_hashset(&mut d, file_name)?;
Ok(Self {
data: d,
file_name: file_name.to_string(),
})
}
}
impl Match for FileExactMatch {
fn smatch(&self, buff: &str) -> bool {
self.data.contains(buff.as_bytes())
}
fn umatch(&self, buff: &[u8]) -> bool {
self.data.contains(buff)
}
fn show(&self) -> String {
format!("Exact Match of one line in file {}", self.file_name)
}
}
fn load_hashset_c(data: &mut HashSet<Vec<u8>>, fname: &str, unicode: bool) -> Result<()> {
let mut f = Reader::new();
f.do_split(false);
f.open(fname)?;
if f.is_done() {
return Ok(());
}
loop {
let mut line: &[u8] = f.curr().line();
if line.len() > 1 {
if line.last().unwrap() == &b'\n' {
line = &line[..line.len() - 1];
}
if unicode {
data.insert(String::from_utf8(line.to_vec())?.new_lower().into_bytes());
// PERF - 2 allocations
} else {
data.insert(line.new_lower());
}
}
if f.getline()? {
break;
}
}
Ok(())
}
More examples
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
pub fn merge_t2(
&self,
in_files: &[String],
cmp: &mut LineCompList,
mut w: impl Write,
unique: bool,
_tmp: &TempDir,
) -> Result<()> {
if in_files.is_empty() {
return Ok(());
}
if in_files.len() == 1 && !unique {
let r = get_reader(&in_files[0])?;
return copy(r.0, w);
}
let mc = Rc::new(RefCell::new(MergeContext{open : Vec::with_capacity(in_files.len()), cmp}));
let mut heap = BinaryHeap::new_by(|a: &usize, b: &usize| mc.borrow_mut().compare(*a, *b));
{
let mut mcm = mc.borrow_mut();
for x in in_files {
mcm.open.push(Reader::new_open(x)?);
}
if !mcm.cmp.need_split() {
for x in &mut mcm.open {
x.do_split(false);
}
}
// FIXME -- Check Header
if mcm.open[0].has_header() {
w.write_all(mcm.open[0].header().line.as_bytes())?;
}
}
for i in 0..in_files.len() {
if !mc.borrow().open[i].is_done() {
heap.push(i)
}
}
if unique {
if heap.is_empty() {
return Ok(());
}
let first = heap.pop().unwrap();
let mut prev = mc.borrow().open[first].curr_line().clone();
if !mc.borrow_mut().open[first].getline()? {
heap.push(first);
}
w.write_all(prev.line())?;
while !heap.is_empty() {
if let Some(x) = heap.pop() {
let eq = mc.borrow_mut().equal(&prev, x);
if !eq {
let mcm = mc.borrow();
w.write_all(mcm.open[x].curr_line().line())?;
prev.assign(mcm.open[x].curr_line());
}
if !mc.borrow_mut().open[x].getline()? {
heap.push(x);
}
}
}
} else {
while !heap.is_empty() {
if let Some(x) = heap.pop() {
w.write_all(mc.borrow_mut().open[x].curr_line().line())?;
if !mc.borrow_mut().open[x].getline()? {
heap.push(x);
}
}
}
}
Ok(())
}
/// merge all the files into w, using tmp
pub fn merge_t1 (
&self,
in_files: &[String],
cmp: &mut LineCompList,
mut w: impl Write,
unique: bool,
_tmp: &TempDir,
) -> Result<()> {
if in_files.is_empty() {
return Ok(());
}
if in_files.len() == 1 && !unique {
let r = get_reader(&in_files[0])?;
return copy(r.0, w);
}
let mut open_files: Vec<Reader> = Vec::with_capacity(in_files.len());
for x in in_files {
open_files.push(Reader::new_open(x)?);
}
if !cmp.need_split() {
for x in &mut open_files {
x.do_split(false);
}
}
// FIXME -- Check Header
if open_files[0].has_header() {
w.write_all(open_files[0].header().line.as_bytes())?;
}
let nums: Vec<usize> = (0..open_files.len()).collect();
let mut mm = MergeTreeItem::new_tree(&open_files, &nums);
if unique {
let x = mm.next(cmp, &mut open_files)?;
if x.is_none() {
return Ok(());
}
let x = x.unwrap();
w.write_all(open_files[x].curr_line().line())?;
let mut prev = open_files[x].curr_line().clone();
loop {
let x = mm.next(cmp, &mut open_files)?;
if x.is_none() {
break;
}
let x = x.unwrap();
if !cmp.equal_cols(&prev, open_files[x].curr_line()) {
w.write_all(open_files[x].curr_line().line())?;
}
prev.assign(open_files[x].curr_line());
}
} else {
loop {
let x = mm.next(cmp, &mut open_files)?;
if x.is_none() {
break;
}
let x = x.unwrap();
w.write_all(open_files[x].curr_line().line())?;
}
}
Ok(())
}
/// merge all the files into w
pub fn merge(&self, files: &[String], cmp: &mut LineCompList, w: impl Write, unique: bool) -> Result<()> {
let tmp = TempDir::new("merge")?;
if self.alt_merge {
self.merge_t1(files, cmp, w, unique, &tmp)
} else {
self.merge_t2(files, cmp, w, unique, &tmp)
}
}
/// given two file names, merge them into output
pub fn merge_2(
&self,
left: &str,
right: &str,
cmp: &mut LineCompList,
mut w: impl Write,
unique: bool,
) -> Result<()> {
let mut left_file = Reader::new();
let mut right_file = Reader::new();
left_file.open(left)?;
right_file.open(right)?;
left_file.do_split(false);
right_file.do_split(false);
cmp.lookup(&left_file.names())?;
// FIXME -- Check Header
if left_file.has_header() {
w.write_all(left_file.header().line.as_bytes())?;
}
if unique {
let mut prev: Vec<u8> = Vec::new();
while !left_file.is_done() && !right_file.is_done() {
let ord = cmp.comp_lines(left_file.curr().line(), right_file.curr().line());
if ord == Ordering::Less {
left_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
left_file.getline()?;
} else if ord == Ordering::Greater {
right_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
right_file.getline()?;
} else {
left_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
left_file.getline()?;
right_file.getline()?;
}
while !left_file.is_done() && cmp.equal_lines(left_file.curr().line(), &prev) {
left_file.getline()?;
}
while !right_file.is_done() && cmp.equal_lines(right_file.curr().line(), &prev) {
right_file.getline()?;
}
}
} else {
while !left_file.is_done() && !right_file.is_done() {
let ord = cmp.comp_lines(left_file.curr().line(), right_file.curr().line());
// if Equal, write both lines
if ord != Ordering::Less {
right_file.write(&mut w)?;
right_file.getline()?;
}
if ord != Ordering::Greater {
left_file.write(&mut w)?;
left_file.getline()?;
}
}
}
while !left_file.is_done() {
left_file.write(&mut w)?;
left_file.getline()?;
}
while !right_file.is_done() {
right_file.write(&mut w)?;
right_file.getline()?;
}
Ok(())
}
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Sample lines from files.", args::FileCount::Many);
const A: [ArgSpec; 4] = [
arg_enum! {"header", "h", "Mode", "header requirements", &HEADER_MODE},
arg! {"for", "f", "by,from,to", "for i=from; i<=to; i+= by"},
arg! {"sample", "s", "Number", "Select this number of lines, more or less evenly spaced."},
arg! {"range", "r", "Ranges", "e.g. 1-5,42,95-106."},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut checker = HeaderChecker::new();
let mut floop = For::default();
let mut ranges = Ranges::default();
let mut sample = 10;
let mut saw_sample = false;
let mut saw_for = false;
let mut saw_range = false;
for x in args {
if x.name == "header" {
checker.mode = HeaderMode::from_str(&x.value)?;
} else if x.name == "for" {
if saw_for || saw_sample || saw_range {
return err!("No more that one --sample, --range or --for allowed");
}
floop = For::new(&x.value)?;
saw_for = true;
} else if x.name == "range" {
if saw_for || saw_sample || saw_range {
return err!("No more that one --sample, --range or --for allowed");
}
saw_range = true;
for x in x.value.split(',') {
ranges.push(x)?;
}
} else if x.name == "sample" {
if saw_for || saw_sample || saw_range {
return err!("No more that one --sample, --range or --for allowed");
}
sample = x.value.to_usize_whole(x.value.as_bytes(), "sample size")?;
saw_sample = true;
} else {
unreachable!();
}
}
if !saw_for && !saw_sample && !saw_range {
saw_sample = true;
sample = Rect::from_screen().height;
}
let mut w = get_writer("-")?;
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if f.is_empty() {
continue;
}
if checker.check_file(&f, x)? {
w.write_all(f.header().line.as_bytes())?;
}
if f.is_done() {
return Ok(());
}
f.do_split(false);
if saw_sample {
let mut s = Smooth::new(sample);
loop {
s.add(f.curr().line());
if f.getline()? {
break;
}
}
s.finalize(&mut w.0)?;
} else if saw_for {
let mut next = floop.from;
while f.line_number() <= floop.to {
if f.line_number() == next {
w.write_all(f.curr_line().line())?;
next += floop.by;
}
if f.getline()? {
break;
}
}
} else {
let max = ranges.max();
while f.line_number() <= max {
if ranges.contains(f.line_number()) {
w.write_all(f.curr_line().line())?;
}
if f.getline()? {
break;
}
}
}
}
Ok(())
}
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Select uniq lines.", args::FileCount::One);
const A: [ArgSpec; 7] = [
arg! {"agg", "a", "Col,Spec", "Merge value from this column, in place."},
arg! {"agg-pre", "", "NewCol,SrcCol,Spec", "Merge value from SrcCol into new column, before other columns."},
arg! {"agg-post", "", "NewCol,SrcCol,Spec", "Merge value from SrcCol into new column, after other columns."},
arg! {"key", "k", "Spec", "How to compare adjacent lines"},
arg! {"count", "c", "ColName,Position", "Write the count of matching line."},
arg! {"which", "w", "(First,Last,Min,Max)[,LineCompare]", "Which of the matching lines should be printed."},
arg! {"agg-help", "", "", "Print help for aggregators"},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut agg = LineAggList::new();
let mut comp = LineCompList::new();
let mut count = Count::default();
for x in args {
if x.name == "key" {
comp.add(&x.value)?;
} else if x.name == "count" {
count.get_count(&x.value)?;
} else if x.name == "which" {
count.get_which(&x.value)?;
} else if x.name == "agg" {
agg.push_replace(&x.value)?;
} else if x.name == "agg-post" {
agg.push_append(&x.value)?;
} else if x.name == "agg-pre" {
agg.push_prefix(&x.value)?;
} else {
unreachable!();
}
}
assert_eq!(files.len(), 1);
let mut f = Reader::new();
f.open(&files[0])?;
if f.is_empty() {
return Ok(());
}
comp.lookup(&f.names())?;
count.lookup(&f.names())?;
let mut c_write = Writer::new(f.delim());
if !agg.is_empty() {
if count.pos == CountPos::Begin {
agg.push_first_prefix(&format!("{},1,count", count.name))?;
}
if count.pos == CountPos::End {
agg.push_append(&format!("{},1,count", count.name))?;
}
agg.lookup(&f.names())?;
agg.fill(&mut c_write, f.header());
c_write.lookup(&f.names())?;
}
let mut w = get_writer("-")?;
if f.has_header() {
let mut ch = ColumnHeader::new();
if agg.is_empty() {
if count.pos == CountPos::Begin {
ch.push(&count.name)?;
}
ch.push_all(f.header())?;
if count.pos == CountPos::End {
ch.push(&count.name)?;
}
} else {
c_write.add_names(&mut ch, f.header())?;
}
w.write_all(ch.get_head(f.delim()).as_bytes())?;
}
if f.is_done() {
return Ok(());
}
f.do_split(comp.need_split());
let mut matches = 1;
if !agg.is_empty() {
agg.add(f.curr_line());
let mut tmp = f.curr_line().clone();
loop {
if f.getline()? {
c_write.write(&mut w.0, &tmp)?;
break;
}
if comp.equal_cols(f.prev_line(1), f.curr_line()) {
count.assign(&mut tmp, f.curr_line());
agg.add(f.curr_line());
} else {
c_write.write(&mut w.0, &tmp)?;
tmp.assign(f.curr_line());
agg.reset();
agg.add(f.curr_line());
}
}
} else if count.which == Which::Last {
loop {
if f.getline()? {
count.write(&mut w.0, matches, f.prev_line(1).line(), f.delim())?;
break;
}
if comp.equal_cols(f.prev_line(1), f.curr_line()) {
matches += 1;
} else {
count.write(&mut w.0, matches, f.prev_line(1).line(), f.delim())?;
matches = 1;
}
}
} else if count.which == Which::First && count.is_plain() {
f.write_curr(&mut w.0)?;
loop {
if f.getline()? {
break;
}
if !comp.equal_cols(f.prev_line(1), f.curr_line()) {
f.write_curr(&mut w.0)?;
}
}
} else {
let mut tmp = f.curr_line().clone();
loop {
if f.getline()? {
count.write(&mut w.0, matches, tmp.line(), f.delim())?;
break;
}
if comp.equal_cols(f.prev_line(1), f.curr_line()) {
count.assign(&mut tmp, f.curr_line());
matches += 1;
} else {
count.write(&mut w.0, matches, tmp.line(), f.delim())?;
tmp.assign(f.curr_line());
matches = 1;
}
}
}
Ok(())
}
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
/*
let mut ws = libc::winsize{ws_row:0, ws_col:0, ws_xpixel:0, ws_ypixel:0};
if unsafe{libc::ioctl(1, libc::TIOCGWINSZ, &mut ws)} >= 0 {
eprintln!("Window size {} {}", ws.ws_row, ws.ws_col);
}
else {
eprintln!("ioctl failed");
}
*/
let prog = args::ProgSpec::new("Concatenate files.", args::FileCount::Many);
const A: [ArgSpec; 6] = [
arg_enum! {"pad", "p", "Mode", "Add trailing newline if absent.", &["Yes","No","End"]},
arg! {"remove", "r", "Matcher", "Remove these lines."},
arg! {"skip", "s", "Matcher", "Do not number these lines."},
arg! {"number", "n", "Name,Start,Where", "Number the lines in column 'Name', starting at 'Start', 'Where' can be 'begin' or 'end'"},
arg! {"begin", "b", "", "Shortcut for --number number,1,begin"},
arg! {"end", "e", "", "Shortcut for --number number,1,end"},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut pad = PadMode::All;
let mut num = LineNumber::new();
let mut skips = MatcherList::new();
let mut removes = MatcherList::new();
for x in args {
if x.name == "pad" {
if x.value.to_ascii_lowercase() == "yes" {
pad = PadMode::All;
} else if x.value.to_ascii_lowercase() == "no" {
pad = PadMode::None;
} else if x.value.to_ascii_lowercase() == "end" {
pad = PadMode::End;
} else {
unreachable!();
}
} else if x.name == "number" {
num.set(&x.value)?;
} else if x.name == "begin" {
num.set("number,1,begin")?;
} else if x.name == "end" {
num.set("number,1,end")?;
} else if x.name == "skip" {
skips.push(&x.value)?;
} else if x.name == "remove" {
removes.push(&x.value)?;
} else {
unreachable!();
}
}
let mut header = ColumnHeader::new();
let mut w = get_writer("-")?;
let slow = num.do_it || !skips.is_empty() || !removes.is_empty();
if slow {
let mut v = Writer::new(b'\t');
if num.do_it && !num.end {
v.push(Box::new(ColumnCount::new(num.start, &num.name)));
}
v.push(Box::new(ColumnWhole));
if num.do_it && num.end {
v.push(Box::new(ColumnCount::new(num.start, &num.name)));
}
let mut not_v = Writer::new(b'\t');
if num.do_it && !num.end {
not_v.push(Box::new(ColumnLiteral::new(b"", "unused")));
}
not_v.push(Box::new(ColumnWhole));
if num.do_it && num.end {
not_v.push(Box::new(ColumnLiteral::new(b"", "unused")));
}
let mut not_header = String::new();
for x in &files {
let mut f = Reader::new_open(x)?;
if f.is_empty() {
continue;
}
v.lookup(&f.names())?;
header.clear();
not_header.clear();
v.add_names(&mut header, f.header())?;
if f.has_header() {
not_header = header.get_head(b'\t');
}
if settings.checker.check(not_header.as_bytes(), x)? {
w.write_all(not_header.as_bytes())?;
}
if f.is_done() {
return Ok(());
}
f.do_split(false);
loop {
if !removes.umatch(f.curr_nl()) {
if skips.umatch(f.curr_nl()) {
not_v.write(&mut w.0, f.curr())?;
} else {
v.write(&mut w.0, f.curr())?;
}
}
if f.getline()? {
break;
}
}
}
} else {
const SIZE: usize = 16 * 1024;
let mut buffer = [0u8; SIZE];
let mut first_line = Vec::new();
let mut last_was_cr = true;
for x in files {
let mut f = get_reader(&x)?;
first_line.clear();
let n = f.read_until(b'\n', &mut first_line)?;
if n == 0 {
continue;
}
if settings.checker.check(&first_line, &x)? {
w.write_all(&first_line)?;
last_was_cr = first_line.last().unwrap() == &b'\n';
}
loop {
let n = f.read(&mut buffer[..])?;
if n == 0 {
break;
}
w.write_all(&buffer[..n])?;
last_was_cr = first_line.last().unwrap() == &b'\n';
}
if pad == PadMode::All && !last_was_cr {
w.write_all(b"\n")?;
last_was_cr = true;
}
}
if pad == PadMode::End && !last_was_cr {
w.write_all(b"\n")?;
}
}
Ok(())
}
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Aggregate info on whole lines.", args::FileCount::Many);
const A: [ArgSpec; 10] = [
arg! {"agg", "a", "NewCol,Spec", "Merge values into new column."},
arg! {"lines", "l", "", "Shortcut for '--agg lines,count'"},
arg! {"bytes", "b", "", "Shortcut for '--agg bytes,asum,chars'"},
arg! {"chars", "c", "", "Shortcut for '--agg chars,asum,utf8.chars'"},
arg! {"words", "w", "", "Shortcut for '--agg words,asum,swords'"},
arg! {"file", "f", "Tri,ColName", "Should we add the filename as the first column?"},
arg! {"with-header", "h", "Tri", "Should we write a cdx header?"},
arg! {"total", "t", "yes,no,maybe,only", "Should we write the totals line?"},
arg! {"format", "F", "plain,float,power2,power10", "Format for output numbers."},
arg! {"columns", "C", "", "Count each column separately."},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut agg = AggList::new();
let mut file_name_col = "file".to_string();
let mut show_file_name = Tri::Maybe;
let mut show_header = Tri::Maybe;
let mut show_totals = Tri::Maybe;
let mut total_only = false;
let mut do_columns = false;
let mut fmt = NumFormat::default();
for x in args {
if x.name == "agg" {
agg.push(&x.value)?;
} else if x.name == "lines" {
agg.push("lines,count")?;
} else if x.name == "bytes" {
agg.push("bytes,asum,chars")?;
} else if x.name == "chars" {
agg.push("chars,asum,utf8.chars")?;
} else if x.name == "words" {
agg.push("words,asum,swords")?;
} else if x.name == "format" {
fmt = NumFormat::new(&x.value)?;
} else if x.name == "columns" {
do_columns = true;
} else if x.name == "total" {
if x.value.eq_ignore_ascii_case("only") {
total_only = true;
} else {
show_totals = Tri::new(&x.value)?;
}
} else if x.name == "with-header" {
show_header = Tri::new(&x.value)?;
} else if x.name == "file" {
if let Some((a, b)) = x.value.split_once(',') {
show_file_name = Tri::new(a)?;
file_name_col = b.to_string();
} else {
show_file_name = Tri::new(&x.value)?;
}
} else {
unreachable!();
}
}
let nada = TextLine::new();
let mut w = get_writer("-")?;
let mut first_file = true;
let mut totals = Vec::new();
let do_totals;
let show_file;
agg.fmt(fmt);
if do_columns {
if agg.is_empty() {
agg.push("bytes,asum,chars")?;
}
totals.resize(agg.len(), 0.0);
let mut aggs: Vec<NamedAgg> = Vec::new();
let mut colmap: Vec<usize> = Vec::new();
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if f.is_empty() {
continue;
}
colmap.clear();
for x in &f.names() {
if let Some(pos) = aggs.iter().position(|agg| agg.name == *x) {
colmap.push(pos);
} else {
colmap.push(aggs.len());
aggs.push(NamedAgg::new(x, agg.deep_clone()));
}
}
if f.is_done() {
break;
}
loop {
for (i, x) in f.curr_line().iter().enumerate() {
aggs[colmap[i]].agg.add(x);
}
if f.getline()? {
break;
}
}
}
show_file = match show_file_name {
Tri::Yes => true,
Tri::No => false,
Tri::Maybe => aggs.len() > 1 && !total_only,
};
do_totals = match show_totals {
Tri::Yes => true,
Tri::No => false,
Tri::Maybe => aggs.len() > 1,
} || total_only;
let do_header = match show_header {
Tri::Yes => true,
Tri::No => false,
Tri::Maybe => agg.len() > 1 || show_file,
};
if do_header {
let mut ch = ColumnHeader::new();
if show_file_name != Tri::No {
ch.push("column")?;
}
let mut c_write = Writer::new(b'\t');
agg.fill(&mut c_write);
c_write.add_names(&mut ch, &StringLine::new())?;
w.write_all(ch.get_head(b'\t').as_bytes())?;
}
if !total_only {
for x in &aggs {
if show_file_name != Tri::No {
w.write_all(x.name.as_bytes())?;
w.write_all(b"\t")?;
}
for i in 0..x.agg.len() {
if i != 0 {
w.write_all(b"\t")?;
}
x.agg.get(i).agg.borrow_mut().result(&mut w.0, fmt)?;
}
w.write_all(b"\n")?;
}
}
if do_totals {
for x in &aggs {
#[allow(clippy::needless_range_loop)]
for i in 0..agg.len() {
totals[i] += x.agg.get(i).agg.borrow().value();
}
}
}
} else {
if agg.is_empty() {
agg.push("lines,count")?;
}
totals.resize(agg.len(), 0.0);
show_file = match show_file_name {
Tri::Yes => true,
Tri::No => false,
Tri::Maybe => files.len() > 1 && !total_only,
};
do_totals = match show_totals {
Tri::Yes => true,
Tri::No => false,
Tri::Maybe => files.len() > 1,
} || total_only;
let do_header = match show_header {
Tri::Yes => true,
Tri::No => false,
Tri::Maybe => agg.len() > 1 || show_file,
};
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if f.is_empty() {
continue;
}
let mut c_write = Writer::new(f.delim());
agg.fill(&mut c_write);
c_write.lookup(&f.names())?;
if do_header && first_file {
first_file = false;
let mut ch = ColumnHeader::new();
if show_file {
ch.push(&file_name_col)?;
}
c_write.add_names(&mut ch, f.header())?;
w.write_all(ch.get_head(f.delim()).as_bytes())?;
}
if f.is_done() {
continue;
}
f.do_split(false);
loop {
agg.add(f.curr_line().line());
if f.getline()? {
break;
}
}
if !total_only {
if show_file {
w.write_all(x.as_bytes())?;
w.write_all(b"\t")?;
}
c_write.write(&mut w.0, &nada)?;
}
#[allow(clippy::needless_range_loop)]
for i in 0..agg.len() {
totals[i] += agg.get(i).agg.borrow().value();
}
agg.reset();
}
}
if do_totals {
if show_file {
w.write_all(b"totals\t")?;
}
for (i, t) in totals.iter().enumerate() {
if i != 0 {
w.write_all(b"\t")?;
}
fmt.print(*t, &mut w.0)?;
}
w.write_all(b"\n")?;
}
Ok(())
}
sourcepub fn new_open(name: &str) -> Result<Self>
pub fn new_open(name: &str) -> Result<Self>
make a new Reader
Examples found in repository
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
pub fn merge_t2(
&self,
in_files: &[String],
cmp: &mut LineCompList,
mut w: impl Write,
unique: bool,
_tmp: &TempDir,
) -> Result<()> {
if in_files.is_empty() {
return Ok(());
}
if in_files.len() == 1 && !unique {
let r = get_reader(&in_files[0])?;
return copy(r.0, w);
}
let mc = Rc::new(RefCell::new(MergeContext{open : Vec::with_capacity(in_files.len()), cmp}));
let mut heap = BinaryHeap::new_by(|a: &usize, b: &usize| mc.borrow_mut().compare(*a, *b));
{
let mut mcm = mc.borrow_mut();
for x in in_files {
mcm.open.push(Reader::new_open(x)?);
}
if !mcm.cmp.need_split() {
for x in &mut mcm.open {
x.do_split(false);
}
}
// FIXME -- Check Header
if mcm.open[0].has_header() {
w.write_all(mcm.open[0].header().line.as_bytes())?;
}
}
for i in 0..in_files.len() {
if !mc.borrow().open[i].is_done() {
heap.push(i)
}
}
if unique {
if heap.is_empty() {
return Ok(());
}
let first = heap.pop().unwrap();
let mut prev = mc.borrow().open[first].curr_line().clone();
if !mc.borrow_mut().open[first].getline()? {
heap.push(first);
}
w.write_all(prev.line())?;
while !heap.is_empty() {
if let Some(x) = heap.pop() {
let eq = mc.borrow_mut().equal(&prev, x);
if !eq {
let mcm = mc.borrow();
w.write_all(mcm.open[x].curr_line().line())?;
prev.assign(mcm.open[x].curr_line());
}
if !mc.borrow_mut().open[x].getline()? {
heap.push(x);
}
}
}
} else {
while !heap.is_empty() {
if let Some(x) = heap.pop() {
w.write_all(mc.borrow_mut().open[x].curr_line().line())?;
if !mc.borrow_mut().open[x].getline()? {
heap.push(x);
}
}
}
}
Ok(())
}
/// merge all the files into w, using tmp
pub fn merge_t1 (
&self,
in_files: &[String],
cmp: &mut LineCompList,
mut w: impl Write,
unique: bool,
_tmp: &TempDir,
) -> Result<()> {
if in_files.is_empty() {
return Ok(());
}
if in_files.len() == 1 && !unique {
let r = get_reader(&in_files[0])?;
return copy(r.0, w);
}
let mut open_files: Vec<Reader> = Vec::with_capacity(in_files.len());
for x in in_files {
open_files.push(Reader::new_open(x)?);
}
if !cmp.need_split() {
for x in &mut open_files {
x.do_split(false);
}
}
// FIXME -- Check Header
if open_files[0].has_header() {
w.write_all(open_files[0].header().line.as_bytes())?;
}
let nums: Vec<usize> = (0..open_files.len()).collect();
let mut mm = MergeTreeItem::new_tree(&open_files, &nums);
if unique {
let x = mm.next(cmp, &mut open_files)?;
if x.is_none() {
return Ok(());
}
let x = x.unwrap();
w.write_all(open_files[x].curr_line().line())?;
let mut prev = open_files[x].curr_line().clone();
loop {
let x = mm.next(cmp, &mut open_files)?;
if x.is_none() {
break;
}
let x = x.unwrap();
if !cmp.equal_cols(&prev, open_files[x].curr_line()) {
w.write_all(open_files[x].curr_line().line())?;
}
prev.assign(open_files[x].curr_line());
}
} else {
loop {
let x = mm.next(cmp, &mut open_files)?;
if x.is_none() {
break;
}
let x = x.unwrap();
w.write_all(open_files[x].curr_line().line())?;
}
}
Ok(())
}
More examples
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Select uniq lines.", args::FileCount::Many);
const A: [ArgSpec; 6] = [
arg! {"pattern", "p", "Col,Spec,Pattern", "Select line where this col matches this pattern."},
arg! {"show-const", "", "", "Print available constants"},
arg! {"show-func", "", "", "Print available functions"},
arg! {"or", "o", "", "A line matches if any of the matchers matches."},
arg! {"invert", "v", "", "Print lines that don't match."},
arg! {"location", "l", "name:what", "prefix extra columns of location context."},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut list = LineMatcherList::new_with(Combiner::And);
let mut reverse = false;
let mut loc = FileLocList::new();
for x in args {
if x.name == "pattern" {
list.push(&x.value)?;
} else if x.name == "or" {
list.multi = Combiner::Or;
} else if x.name == "invert" {
reverse = true;
} else if x.name == "location" {
loc.push(&x.value)?;
} else if x.name == "show-const" {
expr::show_const();
return Ok(());
} else if x.name == "show-func" {
expr::show_func();
return Ok(());
} else {
unreachable!();
}
}
let mut w = get_writer("-")?;
for x in &files {
let mut f = Reader::new_open(x)?;
if f.is_empty() {
continue;
}
list.lookup(&f.names())?;
let mut not_header = String::new();
let mut header = ColumnHeader::new();
loc.add(&mut header)?;
header.push_all(f.header())?;
if f.has_header() {
not_header = header.get_head(b'\t');
}
if settings.checker.check(not_header.as_bytes(), x)? {
w.write_all(not_header.as_bytes())?;
}
if f.is_done() {
continue;
}
loop {
if list.ok(f.curr_line()) ^ reverse {
// write previous lines of context if necessary
loc.write_data(&mut w.0, b'\t', f.loc())?;
f.write_curr(&mut w.0)?;
} else {
// write more lines of context if necessary
}
if f.getline()? {
break;
}
}
}
Ok(())
}
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Sort lines.", args::FileCount::Many);
const A: [ArgSpec; 7] = [
arg! {"key", "k", "Spec", "How to compare adjacent lines"},
arg! {"unique", "u", "", "Print only first of equal lines"},
arg! {"merge", "m", "", "Merge already sorted files."},
arg! {"check", "c", "", "Check to see if each input file is sorted."},
arg! {"Check", "C", "Number", "Check to see if each input file is sorted. Report this many failures before exiting."},
arg! {"alt-sort", "a", "", "Use alternate sort algorithm"},
arg! {"alt-merge", "A", "", "Use alternate merge algorithm"},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut unique = false;
let mut merge = false;
let mut comp = LineCompList::new();
let mut check = false;
let mut num_checks = 1;
let mut config = SortConfig::default();
for x in args {
if x.name == "key" {
comp.add(&x.value)?;
} else if x.name == "alt-merge" {
config.alt_merge = true;
} else if x.name == "alt-sort" {
config.alt_sort = true;
} else if x.name == "merge" {
merge = true;
} else if x.name == "check" {
check = true;
num_checks = 1;
} else if x.name == "Check" {
check = true;
num_checks = x
.value
.to_usize_whole(x.value.as_bytes(), "number of reports")?;
} else if x.name == "unique" {
unique = true;
} else {
unreachable!();
}
}
if check && merge {
return err!("Check and Merge make no sense together");
}
if comp.is_empty() {
comp.add("")?;
}
if check {
let mut reported = 0;
for x in &files {
let mut f = Reader::new_open(x)?;
if f.is_done() {
continue;
}
loop {
if f.getline()? {
break;
}
if comp_check(&f, &mut comp, unique) {
reported += 1;
if reported >= num_checks {
break;
}
}
}
}
if reported > 0 {
return cdx_err(CdxError::Silent);
}
} else {
let mut w = get_writer("-")?;
if merge {
config.merge(&files, &mut comp, &mut w.0, unique)?;
} else {
config.sort(&files, comp, &mut w.0, unique)?;
}
}
Ok(())
}
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
fn join(&mut self, config: &JoinConfig) -> Result<()> {
if config.infiles.len() < 2 {
return err!(
"Join requires at least two input files, {} found",
config.infiles.len()
);
}
for x in &config.infiles {
self.r.push(Reader::new_open(x)?);
}
for _x in 0..config.infiles.len() {
self.no_match.push(None)
}
for x in &config.unmatch_out {
if (x.file_num < 1) || (x.file_num > config.infiles.len()) {
return err!(
"Join had {} input files, but requested non matching lines from file {}",
config.infiles.len(),
x.file_num
);
}
let num = x.file_num - 1;
if self.no_match[num].is_none() {
let mut w = get_writer(&x.file_name)?;
self.r[num].write_header(&mut *w)?;
self.no_match[num] = Some(w);
} else {
return err!("Multiple uses of --also for file {}", x.file_num);
}
}
if config.keys.is_empty() {
self.comp.push(CompMaker::make_line_comp("1")?);
} else {
for x in &config.keys {
self.comp.push(CompMaker::make_line_comp(x)?);
}
}
for i in 0..self.r.len() {
self.comp.lookup_n(&self.r[i].names(), i)?;
}
if config.col_specs.is_empty() {
for f in 0..self.r.len() {
let used = self.comp.used_cols(f);
for x in 0..self.r[f].names().len() {
if (f == 0) || !used.contains(&x) {
self.out_cols.push(OneOutCol::new_plain(f, x));
}
}
}
} else {
for x in &config.col_specs {
let mut x = x.clone();
if x.file >= self.r.len() {
return err!(
"{} input files, but file {} referred to as an output column",
self.r.len(),
x.file
);
}
x.cols.lookup(&self.r[x.file].names())?;
for y in x.cols.get_cols() {
self.out_cols.push(OneOutCol::new(x.file, y));
}
}
}
if self.out_cols.is_empty() {
return err!("No output columns specified");
}
if self.r[0].has_header() {
self.yes_match.write_all(b" CDX")?;
for x in &self.out_cols {
self.yes_match.write_all(&[config.out_delim])?;
x.write_head(&mut *self.yes_match, &self.r)?;
}
self.yes_match.0.write_all(&[b'\n'])?;
}
if config.jtype == JoinType::Quick {
self.join_quick(config)
} else {
err!("Only quick supported")
}
}
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
/*
let mut ws = libc::winsize{ws_row:0, ws_col:0, ws_xpixel:0, ws_ypixel:0};
if unsafe{libc::ioctl(1, libc::TIOCGWINSZ, &mut ws)} >= 0 {
eprintln!("Window size {} {}", ws.ws_row, ws.ws_col);
}
else {
eprintln!("ioctl failed");
}
*/
let prog = args::ProgSpec::new("Concatenate files.", args::FileCount::Many);
const A: [ArgSpec; 6] = [
arg_enum! {"pad", "p", "Mode", "Add trailing newline if absent.", &["Yes","No","End"]},
arg! {"remove", "r", "Matcher", "Remove these lines."},
arg! {"skip", "s", "Matcher", "Do not number these lines."},
arg! {"number", "n", "Name,Start,Where", "Number the lines in column 'Name', starting at 'Start', 'Where' can be 'begin' or 'end'"},
arg! {"begin", "b", "", "Shortcut for --number number,1,begin"},
arg! {"end", "e", "", "Shortcut for --number number,1,end"},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut pad = PadMode::All;
let mut num = LineNumber::new();
let mut skips = MatcherList::new();
let mut removes = MatcherList::new();
for x in args {
if x.name == "pad" {
if x.value.to_ascii_lowercase() == "yes" {
pad = PadMode::All;
} else if x.value.to_ascii_lowercase() == "no" {
pad = PadMode::None;
} else if x.value.to_ascii_lowercase() == "end" {
pad = PadMode::End;
} else {
unreachable!();
}
} else if x.name == "number" {
num.set(&x.value)?;
} else if x.name == "begin" {
num.set("number,1,begin")?;
} else if x.name == "end" {
num.set("number,1,end")?;
} else if x.name == "skip" {
skips.push(&x.value)?;
} else if x.name == "remove" {
removes.push(&x.value)?;
} else {
unreachable!();
}
}
let mut header = ColumnHeader::new();
let mut w = get_writer("-")?;
let slow = num.do_it || !skips.is_empty() || !removes.is_empty();
if slow {
let mut v = Writer::new(b'\t');
if num.do_it && !num.end {
v.push(Box::new(ColumnCount::new(num.start, &num.name)));
}
v.push(Box::new(ColumnWhole));
if num.do_it && num.end {
v.push(Box::new(ColumnCount::new(num.start, &num.name)));
}
let mut not_v = Writer::new(b'\t');
if num.do_it && !num.end {
not_v.push(Box::new(ColumnLiteral::new(b"", "unused")));
}
not_v.push(Box::new(ColumnWhole));
if num.do_it && num.end {
not_v.push(Box::new(ColumnLiteral::new(b"", "unused")));
}
let mut not_header = String::new();
for x in &files {
let mut f = Reader::new_open(x)?;
if f.is_empty() {
continue;
}
v.lookup(&f.names())?;
header.clear();
not_header.clear();
v.add_names(&mut header, f.header())?;
if f.has_header() {
not_header = header.get_head(b'\t');
}
if settings.checker.check(not_header.as_bytes(), x)? {
w.write_all(not_header.as_bytes())?;
}
if f.is_done() {
return Ok(());
}
f.do_split(false);
loop {
if !removes.umatch(f.curr_nl()) {
if skips.umatch(f.curr_nl()) {
not_v.write(&mut w.0, f.curr())?;
} else {
v.write(&mut w.0, f.curr())?;
}
}
if f.getline()? {
break;
}
}
}
} else {
const SIZE: usize = 16 * 1024;
let mut buffer = [0u8; SIZE];
let mut first_line = Vec::new();
let mut last_was_cr = true;
for x in files {
let mut f = get_reader(&x)?;
first_line.clear();
let n = f.read_until(b'\n', &mut first_line)?;
if n == 0 {
continue;
}
if settings.checker.check(&first_line, &x)? {
w.write_all(&first_line)?;
last_was_cr = first_line.last().unwrap() == &b'\n';
}
loop {
let n = f.read(&mut buffer[..])?;
if n == 0 {
break;
}
w.write_all(&buffer[..n])?;
last_was_cr = first_line.last().unwrap() == &b'\n';
}
if pad == PadMode::All && !last_was_cr {
w.write_all(b"\n")?;
last_was_cr = true;
}
}
if pad == PadMode::End && !last_was_cr {
w.write_all(b"\n")?;
}
}
Ok(())
}
sourcepub fn new_open_with(name: &str, lookback: usize) -> Result<Self>
pub fn new_open_with(name: &str, lookback: usize) -> Result<Self>
make a new Reader
sourcepub fn curr_nl(&self) -> &[u8]ⓘNotable traits for &'_ [u8]impl<'_> Read for &'_ [u8]impl<'_> Write for &'_ mut [u8]
pub fn curr_nl(&self) -> &[u8]ⓘNotable traits for &'_ [u8]impl<'_> Read for &'_ [u8]impl<'_> Write for &'_ mut [u8]
get current line contents, without the trailing newline
Examples found in repository
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Evaluate Formatted Expressions.", args::FileCount::Many);
const A: [ArgSpec; 1] = [arg! {"fmt", "f", "Format", "How to format values."}];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut fmt = NumFormat::default();
for x in args {
if x.name == "fmt" {
fmt = NumFormat::new(&x.value)?;
} else {
unreachable!();
}
}
let mut w = get_writer("-")?;
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if f.is_empty() || f.is_done() {
continue;
}
loop {
let exp = &String::from_utf8_lossy(f.curr_nl());
let (f2, exp) = parse_fmt_expr(fmt, exp);
fmt = f2;
match calc(exp) {
Ok(v) => {
fmt.print(v, &mut w.0)?;
w.write_all(b"\n")?;
w.flush()?;
}
Err(e) => eprintln!("{}", e),
}
if f.getline()? {
break;
}
}
}
Ok(())
}
More examples
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Join files on a matching column.", args::FileCount::Many);
const A: [ArgSpec; 6] = [
arg_enum! {"end", "e", "Mode", "When to stop. Default Exact", &["Exact", "Early", "Late"]},
arg! {"default", "d", "ScopedValue", "Use this value for short files."},
arg! {"last", "l", "", "Use value from last line for short files."},
arg! {"rename", "r", "old.new,...", "Dupicate column named 'old' is renamed 'new'."},
arg! {"rename-sloppy", "R", "", "Not an error is some renames not used."},
arg_enum! {"dups", "D", "Mode", "Duplicate Column Handling", &["Fail", "Allow", "Numeric"]},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut end_mode = EndMode::Exact;
let mut dflt = ScopedValues::new();
let mut header = ColumnHeader::new();
let mut use_last = false;
for x in args {
if x.name == "end" {
end_mode = EndMode::new(&x.value)?;
} else if x.name == "default" {
dflt.add(&x.value, ',')?;
} else if x.name == "dups" {
header.set_handling(DupColHandling::new(&x.value)?);
} else if x.name == "rename" {
header.rename(&x.value)?;
} else if x.name == "rename-sloppy" {
header.rename_sloppy();
} else if x.name == "last" {
use_last = true;
} else {
unreachable!();
}
}
let mut fds: Vec<Reader> = Vec::with_capacity(files.len());
let mut num_live = 0;
let mut num_dead = 0;
let mut do_header = true;
let mut rngs: Vec<std::ops::Range<usize>> = Vec::new();
let mut curr_cols = 0;
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if !f.has_header() {
do_header = false;
}
if do_header {
header.push_all(f.header())?;
} else {
header.push_all_unchecked(f.header());
}
if f.is_done() {
num_dead += 1;
} else {
num_live += 1;
}
rngs.push(std::ops::Range {
start: curr_cols,
end: curr_cols + f.header().len(),
});
curr_cols += f.header().len();
fds.push(f);
}
let mut w = get_writer("-")?;
header.check_rename()?;
if do_header {
w.write_all(header.get_head(b'\t').as_bytes())?;
}
dflt.lookup(&header.fieldnames())?;
while num_live > 0 {
if end_mode == EndMode::Exact && num_dead != 0 && num_live != 0 {
let mut s = String::new();
if num_dead <= num_live {
for (i, f) in files.iter().enumerate() {
if fds[i].is_done() {
if !s.is_empty() {
s.push_str(", ");
}
s.push_str(f);
}
}
s.push_str(" ended early");
} else {
for (i, f) in files.iter().enumerate() {
if !fds[i].is_done() {
if !s.is_empty() {
s.push_str(", ");
}
s.push_str(f);
}
}
s.push_str(" still has data");
}
return err!("Input files had different lengths. {}", s); // FIXME - which files were short?
}
if end_mode == EndMode::Early && num_dead != 0 {
break;
}
let mut need_delim = false;
for (i, f) in fds.iter_mut().enumerate() {
if need_delim {
w.write_all(b"\t")?;
}
need_delim = true;
if f.is_done() {
if use_last {
w.write_all(f.prev_nl(1))?;
} else {
let mut nd = false;
for j in rngs[i].start..rngs[i].end {
if nd {
w.write_all(b"\t")?;
}
nd = true;
w.write_all(dflt.get(j).as_bytes())?;
}
}
} else {
w.write_all(f.curr_nl())?;
if f.getline()? {
num_dead += 1;
num_live -= 1;
}
}
}
w.write_all(b"\n")?;
}
Ok(())
}
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
/*
let mut ws = libc::winsize{ws_row:0, ws_col:0, ws_xpixel:0, ws_ypixel:0};
if unsafe{libc::ioctl(1, libc::TIOCGWINSZ, &mut ws)} >= 0 {
eprintln!("Window size {} {}", ws.ws_row, ws.ws_col);
}
else {
eprintln!("ioctl failed");
}
*/
let prog = args::ProgSpec::new("Concatenate files.", args::FileCount::Many);
const A: [ArgSpec; 6] = [
arg_enum! {"pad", "p", "Mode", "Add trailing newline if absent.", &["Yes","No","End"]},
arg! {"remove", "r", "Matcher", "Remove these lines."},
arg! {"skip", "s", "Matcher", "Do not number these lines."},
arg! {"number", "n", "Name,Start,Where", "Number the lines in column 'Name', starting at 'Start', 'Where' can be 'begin' or 'end'"},
arg! {"begin", "b", "", "Shortcut for --number number,1,begin"},
arg! {"end", "e", "", "Shortcut for --number number,1,end"},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut pad = PadMode::All;
let mut num = LineNumber::new();
let mut skips = MatcherList::new();
let mut removes = MatcherList::new();
for x in args {
if x.name == "pad" {
if x.value.to_ascii_lowercase() == "yes" {
pad = PadMode::All;
} else if x.value.to_ascii_lowercase() == "no" {
pad = PadMode::None;
} else if x.value.to_ascii_lowercase() == "end" {
pad = PadMode::End;
} else {
unreachable!();
}
} else if x.name == "number" {
num.set(&x.value)?;
} else if x.name == "begin" {
num.set("number,1,begin")?;
} else if x.name == "end" {
num.set("number,1,end")?;
} else if x.name == "skip" {
skips.push(&x.value)?;
} else if x.name == "remove" {
removes.push(&x.value)?;
} else {
unreachable!();
}
}
let mut header = ColumnHeader::new();
let mut w = get_writer("-")?;
let slow = num.do_it || !skips.is_empty() || !removes.is_empty();
if slow {
let mut v = Writer::new(b'\t');
if num.do_it && !num.end {
v.push(Box::new(ColumnCount::new(num.start, &num.name)));
}
v.push(Box::new(ColumnWhole));
if num.do_it && num.end {
v.push(Box::new(ColumnCount::new(num.start, &num.name)));
}
let mut not_v = Writer::new(b'\t');
if num.do_it && !num.end {
not_v.push(Box::new(ColumnLiteral::new(b"", "unused")));
}
not_v.push(Box::new(ColumnWhole));
if num.do_it && num.end {
not_v.push(Box::new(ColumnLiteral::new(b"", "unused")));
}
let mut not_header = String::new();
for x in &files {
let mut f = Reader::new_open(x)?;
if f.is_empty() {
continue;
}
v.lookup(&f.names())?;
header.clear();
not_header.clear();
v.add_names(&mut header, f.header())?;
if f.has_header() {
not_header = header.get_head(b'\t');
}
if settings.checker.check(not_header.as_bytes(), x)? {
w.write_all(not_header.as_bytes())?;
}
if f.is_done() {
return Ok(());
}
f.do_split(false);
loop {
if !removes.umatch(f.curr_nl()) {
if skips.umatch(f.curr_nl()) {
not_v.write(&mut w.0, f.curr())?;
} else {
v.write(&mut w.0, f.curr())?;
}
}
if f.getline()? {
break;
}
}
}
} else {
const SIZE: usize = 16 * 1024;
let mut buffer = [0u8; SIZE];
let mut first_line = Vec::new();
let mut last_was_cr = true;
for x in files {
let mut f = get_reader(&x)?;
first_line.clear();
let n = f.read_until(b'\n', &mut first_line)?;
if n == 0 {
continue;
}
if settings.checker.check(&first_line, &x)? {
w.write_all(&first_line)?;
last_was_cr = first_line.last().unwrap() == &b'\n';
}
loop {
let n = f.read(&mut buffer[..])?;
if n == 0 {
break;
}
w.write_all(&buffer[..n])?;
last_was_cr = first_line.last().unwrap() == &b'\n';
}
if pad == PadMode::All && !last_was_cr {
w.write_all(b"\n")?;
last_was_cr = true;
}
}
if pad == PadMode::End && !last_was_cr {
w.write_all(b"\n")?;
}
}
Ok(())
}
sourcepub fn prev_nl(&self, n: usize) -> &[u8]ⓘNotable traits for &'_ [u8]impl<'_> Read for &'_ [u8]impl<'_> Write for &'_ mut [u8]
pub fn prev_nl(&self, n: usize) -> &[u8]ⓘNotable traits for &'_ [u8]impl<'_> Read for &'_ [u8]impl<'_> Write for &'_ mut [u8]
get previous line contents, without the trailing newline
Examples found in repository
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Join files on a matching column.", args::FileCount::Many);
const A: [ArgSpec; 6] = [
arg_enum! {"end", "e", "Mode", "When to stop. Default Exact", &["Exact", "Early", "Late"]},
arg! {"default", "d", "ScopedValue", "Use this value for short files."},
arg! {"last", "l", "", "Use value from last line for short files."},
arg! {"rename", "r", "old.new,...", "Dupicate column named 'old' is renamed 'new'."},
arg! {"rename-sloppy", "R", "", "Not an error is some renames not used."},
arg_enum! {"dups", "D", "Mode", "Duplicate Column Handling", &["Fail", "Allow", "Numeric"]},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut end_mode = EndMode::Exact;
let mut dflt = ScopedValues::new();
let mut header = ColumnHeader::new();
let mut use_last = false;
for x in args {
if x.name == "end" {
end_mode = EndMode::new(&x.value)?;
} else if x.name == "default" {
dflt.add(&x.value, ',')?;
} else if x.name == "dups" {
header.set_handling(DupColHandling::new(&x.value)?);
} else if x.name == "rename" {
header.rename(&x.value)?;
} else if x.name == "rename-sloppy" {
header.rename_sloppy();
} else if x.name == "last" {
use_last = true;
} else {
unreachable!();
}
}
let mut fds: Vec<Reader> = Vec::with_capacity(files.len());
let mut num_live = 0;
let mut num_dead = 0;
let mut do_header = true;
let mut rngs: Vec<std::ops::Range<usize>> = Vec::new();
let mut curr_cols = 0;
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if !f.has_header() {
do_header = false;
}
if do_header {
header.push_all(f.header())?;
} else {
header.push_all_unchecked(f.header());
}
if f.is_done() {
num_dead += 1;
} else {
num_live += 1;
}
rngs.push(std::ops::Range {
start: curr_cols,
end: curr_cols + f.header().len(),
});
curr_cols += f.header().len();
fds.push(f);
}
let mut w = get_writer("-")?;
header.check_rename()?;
if do_header {
w.write_all(header.get_head(b'\t').as_bytes())?;
}
dflt.lookup(&header.fieldnames())?;
while num_live > 0 {
if end_mode == EndMode::Exact && num_dead != 0 && num_live != 0 {
let mut s = String::new();
if num_dead <= num_live {
for (i, f) in files.iter().enumerate() {
if fds[i].is_done() {
if !s.is_empty() {
s.push_str(", ");
}
s.push_str(f);
}
}
s.push_str(" ended early");
} else {
for (i, f) in files.iter().enumerate() {
if !fds[i].is_done() {
if !s.is_empty() {
s.push_str(", ");
}
s.push_str(f);
}
}
s.push_str(" still has data");
}
return err!("Input files had different lengths. {}", s); // FIXME - which files were short?
}
if end_mode == EndMode::Early && num_dead != 0 {
break;
}
let mut need_delim = false;
for (i, f) in fds.iter_mut().enumerate() {
if need_delim {
w.write_all(b"\t")?;
}
need_delim = true;
if f.is_done() {
if use_last {
w.write_all(f.prev_nl(1))?;
} else {
let mut nd = false;
for j in rngs[i].start..rngs[i].end {
if nd {
w.write_all(b"\t")?;
}
nd = true;
w.write_all(dflt.get(j).as_bytes())?;
}
}
} else {
w.write_all(f.curr_nl())?;
if f.getline()? {
num_dead += 1;
num_live -= 1;
}
}
}
w.write_all(b"\n")?;
}
Ok(())
}
sourcepub const fn delim(&self) -> u8
pub const fn delim(&self) -> u8
get delimiter
Examples found in repository
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
pub fn show(file: &str, screen: &Rect) -> Result<()> {
let mut f = Reader::new();
f.open(file)?;
if f.is_empty() {
return Ok(());
}
let mut lines: Vec<StringLine> = Vec::new();
let mut sizes: Vec<usize> = Vec::new();
sizes.resize(f.header().len(), 0);
if f.has_header() {
lines.push(f.header().clone());
}
if !f.is_done() {
while lines.len() < screen.height {
let mut s = StringLine::new();
s.line = String::from_utf8_lossy(f.curr().line()).to_string();
s.split(f.delim());
lines.push(s);
if f.getline()? {
break;
}
}
}
for x in &lines {
for (i, c) in x.iter().enumerate() {
let width = UnicodeWidthStr::width(c);
if sizes[i] < width {
sizes[i] = width;
}
}
}
let mut total: usize = sizes.iter().sum();
let target = screen.width - sizes.len();
while total > target {
dec_max(&mut sizes);
total = sizes.iter().sum();
}
let mut w = get_writer("-")?;
let mut do_center = f.has_header();
for x in &lines {
let mut need_space = false;
for (c, y) in x.iter().enumerate() {
let (nstr, width) = y.unicode_truncate(sizes[c]);
if need_space {
w.write_all(b" ")?;
}
let num = (sizes[c] - width) / 2;
if do_center {
for _ in 0..num {
w.write_all(b" ")?;
}
}
w.write_all(nstr.as_bytes())?;
if do_center {
let num2 = (sizes[c] - width) - num;
for _ in 0..num2 {
w.write_all(b" ")?;
}
} else {
for _ in width..sizes[c] {
w.write_all(b" ")?;
}
}
need_space = true;
}
do_center = false;
w.write_all(b"\n")?;
}
Ok(())
}
/// show the file in a specific rectangle
pub fn show2(file: &str, screen: &Rect, w: &mut Vec<String>) -> Result<usize> {
let mut f = Reader::new();
f.open(file)?;
if f.is_empty() {
return Ok(0);
}
let mut lines: Vec<StringLine> = Vec::new();
let mut sizes: Vec<usize> = Vec::new();
sizes.resize(f.header().len(), 0);
if f.has_header() {
lines.push(f.header().clone());
}
if !f.is_done() {
while lines.len() < screen.height {
let mut s = StringLine::new();
s.line = String::from_utf8_lossy(f.curr().line()).to_string();
s.split(f.delim());
lines.push(s);
if f.getline()? {
break;
}
}
}
for x in &lines {
for (i, c) in x.iter().enumerate() {
let width = UnicodeWidthStr::width(c);
if sizes[i] < width {
sizes[i] = width;
}
}
}
let mut total: usize = sizes.iter().sum();
let target = screen.width - sizes.len();
while total > target {
dec_max(&mut sizes);
total = sizes.iter().sum();
}
let mut do_center = f.has_header();
w.clear();
for x in &lines {
let mut s = String::new();
let mut need_space = false;
for (c, y) in x.iter().enumerate() {
let (nstr, width) = y.unicode_truncate(sizes[c]);
if need_space {
s.push(' ');
}
let num = (sizes[c] - width) / 2;
if do_center {
for _ in 0..num {
s.push(' ');
}
}
s.push_str(nstr);
if do_center {
let num2 = (sizes[c] - width) - num;
for _ in 0..num2 {
s.push(' ');
}
} else {
for _ in width..sizes[c] {
s.push(' ');
}
}
need_space = true;
}
do_center = false;
w.push(s);
}
Ok(sizes.iter().sum::<usize>() + sizes.len())
}
More examples
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Select uniq lines.", args::FileCount::One);
const A: [ArgSpec; 7] = [
arg! {"agg", "a", "Col,Spec", "Merge value from this column, in place."},
arg! {"agg-pre", "", "NewCol,SrcCol,Spec", "Merge value from SrcCol into new column, before other columns."},
arg! {"agg-post", "", "NewCol,SrcCol,Spec", "Merge value from SrcCol into new column, after other columns."},
arg! {"key", "k", "Spec", "How to compare adjacent lines"},
arg! {"count", "c", "ColName,Position", "Write the count of matching line."},
arg! {"which", "w", "(First,Last,Min,Max)[,LineCompare]", "Which of the matching lines should be printed."},
arg! {"agg-help", "", "", "Print help for aggregators"},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut agg = LineAggList::new();
let mut comp = LineCompList::new();
let mut count = Count::default();
for x in args {
if x.name == "key" {
comp.add(&x.value)?;
} else if x.name == "count" {
count.get_count(&x.value)?;
} else if x.name == "which" {
count.get_which(&x.value)?;
} else if x.name == "agg" {
agg.push_replace(&x.value)?;
} else if x.name == "agg-post" {
agg.push_append(&x.value)?;
} else if x.name == "agg-pre" {
agg.push_prefix(&x.value)?;
} else {
unreachable!();
}
}
assert_eq!(files.len(), 1);
let mut f = Reader::new();
f.open(&files[0])?;
if f.is_empty() {
return Ok(());
}
comp.lookup(&f.names())?;
count.lookup(&f.names())?;
let mut c_write = Writer::new(f.delim());
if !agg.is_empty() {
if count.pos == CountPos::Begin {
agg.push_first_prefix(&format!("{},1,count", count.name))?;
}
if count.pos == CountPos::End {
agg.push_append(&format!("{},1,count", count.name))?;
}
agg.lookup(&f.names())?;
agg.fill(&mut c_write, f.header());
c_write.lookup(&f.names())?;
}
let mut w = get_writer("-")?;
if f.has_header() {
let mut ch = ColumnHeader::new();
if agg.is_empty() {
if count.pos == CountPos::Begin {
ch.push(&count.name)?;
}
ch.push_all(f.header())?;
if count.pos == CountPos::End {
ch.push(&count.name)?;
}
} else {
c_write.add_names(&mut ch, f.header())?;
}
w.write_all(ch.get_head(f.delim()).as_bytes())?;
}
if f.is_done() {
return Ok(());
}
f.do_split(comp.need_split());
let mut matches = 1;
if !agg.is_empty() {
agg.add(f.curr_line());
let mut tmp = f.curr_line().clone();
loop {
if f.getline()? {
c_write.write(&mut w.0, &tmp)?;
break;
}
if comp.equal_cols(f.prev_line(1), f.curr_line()) {
count.assign(&mut tmp, f.curr_line());
agg.add(f.curr_line());
} else {
c_write.write(&mut w.0, &tmp)?;
tmp.assign(f.curr_line());
agg.reset();
agg.add(f.curr_line());
}
}
} else if count.which == Which::Last {
loop {
if f.getline()? {
count.write(&mut w.0, matches, f.prev_line(1).line(), f.delim())?;
break;
}
if comp.equal_cols(f.prev_line(1), f.curr_line()) {
matches += 1;
} else {
count.write(&mut w.0, matches, f.prev_line(1).line(), f.delim())?;
matches = 1;
}
}
} else if count.which == Which::First && count.is_plain() {
f.write_curr(&mut w.0)?;
loop {
if f.getline()? {
break;
}
if !comp.equal_cols(f.prev_line(1), f.curr_line()) {
f.write_curr(&mut w.0)?;
}
}
} else {
let mut tmp = f.curr_line().clone();
loop {
if f.getline()? {
count.write(&mut w.0, matches, tmp.line(), f.delim())?;
break;
}
if comp.equal_cols(f.prev_line(1), f.curr_line()) {
count.assign(&mut tmp, f.curr_line());
matches += 1;
} else {
count.write(&mut w.0, matches, tmp.line(), f.delim())?;
tmp.assign(f.curr_line());
matches = 1;
}
}
}
Ok(())
}
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Aggregate info on whole lines.", args::FileCount::Many);
const A: [ArgSpec; 10] = [
arg! {"agg", "a", "NewCol,Spec", "Merge values into new column."},
arg! {"lines", "l", "", "Shortcut for '--agg lines,count'"},
arg! {"bytes", "b", "", "Shortcut for '--agg bytes,asum,chars'"},
arg! {"chars", "c", "", "Shortcut for '--agg chars,asum,utf8.chars'"},
arg! {"words", "w", "", "Shortcut for '--agg words,asum,swords'"},
arg! {"file", "f", "Tri,ColName", "Should we add the filename as the first column?"},
arg! {"with-header", "h", "Tri", "Should we write a cdx header?"},
arg! {"total", "t", "yes,no,maybe,only", "Should we write the totals line?"},
arg! {"format", "F", "plain,float,power2,power10", "Format for output numbers."},
arg! {"columns", "C", "", "Count each column separately."},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut agg = AggList::new();
let mut file_name_col = "file".to_string();
let mut show_file_name = Tri::Maybe;
let mut show_header = Tri::Maybe;
let mut show_totals = Tri::Maybe;
let mut total_only = false;
let mut do_columns = false;
let mut fmt = NumFormat::default();
for x in args {
if x.name == "agg" {
agg.push(&x.value)?;
} else if x.name == "lines" {
agg.push("lines,count")?;
} else if x.name == "bytes" {
agg.push("bytes,asum,chars")?;
} else if x.name == "chars" {
agg.push("chars,asum,utf8.chars")?;
} else if x.name == "words" {
agg.push("words,asum,swords")?;
} else if x.name == "format" {
fmt = NumFormat::new(&x.value)?;
} else if x.name == "columns" {
do_columns = true;
} else if x.name == "total" {
if x.value.eq_ignore_ascii_case("only") {
total_only = true;
} else {
show_totals = Tri::new(&x.value)?;
}
} else if x.name == "with-header" {
show_header = Tri::new(&x.value)?;
} else if x.name == "file" {
if let Some((a, b)) = x.value.split_once(',') {
show_file_name = Tri::new(a)?;
file_name_col = b.to_string();
} else {
show_file_name = Tri::new(&x.value)?;
}
} else {
unreachable!();
}
}
let nada = TextLine::new();
let mut w = get_writer("-")?;
let mut first_file = true;
let mut totals = Vec::new();
let do_totals;
let show_file;
agg.fmt(fmt);
if do_columns {
if agg.is_empty() {
agg.push("bytes,asum,chars")?;
}
totals.resize(agg.len(), 0.0);
let mut aggs: Vec<NamedAgg> = Vec::new();
let mut colmap: Vec<usize> = Vec::new();
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if f.is_empty() {
continue;
}
colmap.clear();
for x in &f.names() {
if let Some(pos) = aggs.iter().position(|agg| agg.name == *x) {
colmap.push(pos);
} else {
colmap.push(aggs.len());
aggs.push(NamedAgg::new(x, agg.deep_clone()));
}
}
if f.is_done() {
break;
}
loop {
for (i, x) in f.curr_line().iter().enumerate() {
aggs[colmap[i]].agg.add(x);
}
if f.getline()? {
break;
}
}
}
show_file = match show_file_name {
Tri::Yes => true,
Tri::No => false,
Tri::Maybe => aggs.len() > 1 && !total_only,
};
do_totals = match show_totals {
Tri::Yes => true,
Tri::No => false,
Tri::Maybe => aggs.len() > 1,
} || total_only;
let do_header = match show_header {
Tri::Yes => true,
Tri::No => false,
Tri::Maybe => agg.len() > 1 || show_file,
};
if do_header {
let mut ch = ColumnHeader::new();
if show_file_name != Tri::No {
ch.push("column")?;
}
let mut c_write = Writer::new(b'\t');
agg.fill(&mut c_write);
c_write.add_names(&mut ch, &StringLine::new())?;
w.write_all(ch.get_head(b'\t').as_bytes())?;
}
if !total_only {
for x in &aggs {
if show_file_name != Tri::No {
w.write_all(x.name.as_bytes())?;
w.write_all(b"\t")?;
}
for i in 0..x.agg.len() {
if i != 0 {
w.write_all(b"\t")?;
}
x.agg.get(i).agg.borrow_mut().result(&mut w.0, fmt)?;
}
w.write_all(b"\n")?;
}
}
if do_totals {
for x in &aggs {
#[allow(clippy::needless_range_loop)]
for i in 0..agg.len() {
totals[i] += x.agg.get(i).agg.borrow().value();
}
}
}
} else {
if agg.is_empty() {
agg.push("lines,count")?;
}
totals.resize(agg.len(), 0.0);
show_file = match show_file_name {
Tri::Yes => true,
Tri::No => false,
Tri::Maybe => files.len() > 1 && !total_only,
};
do_totals = match show_totals {
Tri::Yes => true,
Tri::No => false,
Tri::Maybe => files.len() > 1,
} || total_only;
let do_header = match show_header {
Tri::Yes => true,
Tri::No => false,
Tri::Maybe => agg.len() > 1 || show_file,
};
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if f.is_empty() {
continue;
}
let mut c_write = Writer::new(f.delim());
agg.fill(&mut c_write);
c_write.lookup(&f.names())?;
if do_header && first_file {
first_file = false;
let mut ch = ColumnHeader::new();
if show_file {
ch.push(&file_name_col)?;
}
c_write.add_names(&mut ch, f.header())?;
w.write_all(ch.get_head(f.delim()).as_bytes())?;
}
if f.is_done() {
continue;
}
f.do_split(false);
loop {
agg.add(f.curr_line().line());
if f.getline()? {
break;
}
}
if !total_only {
if show_file {
w.write_all(x.as_bytes())?;
w.write_all(b"\t")?;
}
c_write.write(&mut w.0, &nada)?;
}
#[allow(clippy::needless_range_loop)]
for i in 0..agg.len() {
totals[i] += agg.get(i).agg.borrow().value();
}
agg.reset();
}
}
if do_totals {
if show_file {
w.write_all(b"totals\t")?;
}
for (i, t) in totals.iter().enumerate() {
if i != 0 {
w.write_all(b"\t")?;
}
fmt.print(*t, &mut w.0)?;
}
w.write_all(b"\n")?;
}
Ok(())
}
sourcepub fn names(&self) -> Vec<&str>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where
A: Allocator,
pub fn names(&self) -> Vec<&str>ⓘNotable traits for Vec<u8, A>impl<A> Write for Vec<u8, A> where
A: Allocator,
A: Allocator,
get column names
Examples found in repository
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Select columns", args::FileCount::Many);
const A: [ArgSpec; 5] = [
arg! {"fields", "f", "Columns", "the columns to select."},
arg! {"group", "g", "Columns", "the columns in a bunch, e.g. '.group:1-3'"},
arg! {"expr", "e", "Name:Expr", "The result of an arithmetic expression"},
arg! {"composite", "c", "Spec", "new value made from parts. e.g. 'stuff:abc^{two}def'"},
arg_enum! {"dups", "D", "Mode", "Duplicate Column Handling", &["Fail", "Allow", "Numeric"]},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut header = ColumnHeader::new();
let mut v = Writer::new(b'\t');
for x in args {
if x.name == "dups" {
header.set_handling(DupColHandling::new(&x.value)?);
} else if x.name == "fields" {
v.push(Box::new(ReaderColumns::new(ColumnSet::from_spec(
&x.value,
)?)));
} else if x.name == "group" {
v.push(Box::new(ColumnClump::from_spec(&x.value)?));
} else if x.name == "expr" {
v.push(Box::new(ColumnExpr::new(&x.value)?));
} else if x.name == "composite" {
v.push(Box::new(CompositeColumn::new(&x.value)?));
} else {
unreachable!();
}
}
if v.is_empty() {
bail!("cut requires at lease one --columns or --groups");
}
let mut w = get_writer("-")?;
let mut not_header = String::new();
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if f.is_empty() {
continue;
}
v.lookup(&f.names())?;
header.clear();
not_header.clear();
v.add_names(&mut header, f.header())?;
if f.has_header() {
not_header = header.get_head(b'\t');
}
if settings.checker.check(not_header.as_bytes(), x)? {
w.write_all(not_header.as_bytes())?;
}
if f.is_done() {
continue;
}
loop {
v.write(&mut w.0, f.curr())?;
if f.getline()? {
break;
}
}
}
Ok(())
}
More examples
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
pub fn merge_2(
&self,
left: &str,
right: &str,
cmp: &mut LineCompList,
mut w: impl Write,
unique: bool,
) -> Result<()> {
let mut left_file = Reader::new();
let mut right_file = Reader::new();
left_file.open(left)?;
right_file.open(right)?;
left_file.do_split(false);
right_file.do_split(false);
cmp.lookup(&left_file.names())?;
// FIXME -- Check Header
if left_file.has_header() {
w.write_all(left_file.header().line.as_bytes())?;
}
if unique {
let mut prev: Vec<u8> = Vec::new();
while !left_file.is_done() && !right_file.is_done() {
let ord = cmp.comp_lines(left_file.curr().line(), right_file.curr().line());
if ord == Ordering::Less {
left_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
left_file.getline()?;
} else if ord == Ordering::Greater {
right_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
right_file.getline()?;
} else {
left_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
left_file.getline()?;
right_file.getline()?;
}
while !left_file.is_done() && cmp.equal_lines(left_file.curr().line(), &prev) {
left_file.getline()?;
}
while !right_file.is_done() && cmp.equal_lines(right_file.curr().line(), &prev) {
right_file.getline()?;
}
}
} else {
while !left_file.is_done() && !right_file.is_done() {
let ord = cmp.comp_lines(left_file.curr().line(), right_file.curr().line());
// if Equal, write both lines
if ord != Ordering::Less {
right_file.write(&mut w)?;
right_file.getline()?;
}
if ord != Ordering::Greater {
left_file.write(&mut w)?;
left_file.getline()?;
}
}
}
while !left_file.is_done() {
left_file.write(&mut w)?;
left_file.getline()?;
}
while !right_file.is_done() {
right_file.write(&mut w)?;
right_file.getline()?;
}
Ok(())
}
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Select uniq lines.", args::FileCount::Many);
const A: [ArgSpec; 6] = [
arg! {"pattern", "p", "Col,Spec,Pattern", "Select line where this col matches this pattern."},
arg! {"show-const", "", "", "Print available constants"},
arg! {"show-func", "", "", "Print available functions"},
arg! {"or", "o", "", "A line matches if any of the matchers matches."},
arg! {"invert", "v", "", "Print lines that don't match."},
arg! {"location", "l", "name:what", "prefix extra columns of location context."},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut list = LineMatcherList::new_with(Combiner::And);
let mut reverse = false;
let mut loc = FileLocList::new();
for x in args {
if x.name == "pattern" {
list.push(&x.value)?;
} else if x.name == "or" {
list.multi = Combiner::Or;
} else if x.name == "invert" {
reverse = true;
} else if x.name == "location" {
loc.push(&x.value)?;
} else if x.name == "show-const" {
expr::show_const();
return Ok(());
} else if x.name == "show-func" {
expr::show_func();
return Ok(());
} else {
unreachable!();
}
}
let mut w = get_writer("-")?;
for x in &files {
let mut f = Reader::new_open(x)?;
if f.is_empty() {
continue;
}
list.lookup(&f.names())?;
let mut not_header = String::new();
let mut header = ColumnHeader::new();
loc.add(&mut header)?;
header.push_all(f.header())?;
if f.has_header() {
not_header = header.get_head(b'\t');
}
if settings.checker.check(not_header.as_bytes(), x)? {
w.write_all(not_header.as_bytes())?;
}
if f.is_done() {
continue;
}
loop {
if list.ok(f.curr_line()) ^ reverse {
// write previous lines of context if necessary
loc.write_data(&mut w.0, b'\t', f.loc())?;
f.write_curr(&mut w.0)?;
} else {
// write more lines of context if necessary
}
if f.getline()? {
break;
}
}
}
Ok(())
}
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
fn join(&mut self, config: &JoinConfig) -> Result<()> {
if config.infiles.len() < 2 {
return err!(
"Join requires at least two input files, {} found",
config.infiles.len()
);
}
for x in &config.infiles {
self.r.push(Reader::new_open(x)?);
}
for _x in 0..config.infiles.len() {
self.no_match.push(None)
}
for x in &config.unmatch_out {
if (x.file_num < 1) || (x.file_num > config.infiles.len()) {
return err!(
"Join had {} input files, but requested non matching lines from file {}",
config.infiles.len(),
x.file_num
);
}
let num = x.file_num - 1;
if self.no_match[num].is_none() {
let mut w = get_writer(&x.file_name)?;
self.r[num].write_header(&mut *w)?;
self.no_match[num] = Some(w);
} else {
return err!("Multiple uses of --also for file {}", x.file_num);
}
}
if config.keys.is_empty() {
self.comp.push(CompMaker::make_line_comp("1")?);
} else {
for x in &config.keys {
self.comp.push(CompMaker::make_line_comp(x)?);
}
}
for i in 0..self.r.len() {
self.comp.lookup_n(&self.r[i].names(), i)?;
}
if config.col_specs.is_empty() {
for f in 0..self.r.len() {
let used = self.comp.used_cols(f);
for x in 0..self.r[f].names().len() {
if (f == 0) || !used.contains(&x) {
self.out_cols.push(OneOutCol::new_plain(f, x));
}
}
}
} else {
for x in &config.col_specs {
let mut x = x.clone();
if x.file >= self.r.len() {
return err!(
"{} input files, but file {} referred to as an output column",
self.r.len(),
x.file
);
}
x.cols.lookup(&self.r[x.file].names())?;
for y in x.cols.get_cols() {
self.out_cols.push(OneOutCol::new(x.file, y));
}
}
}
if self.out_cols.is_empty() {
return err!("No output columns specified");
}
if self.r[0].has_header() {
self.yes_match.write_all(b" CDX")?;
for x in &self.out_cols {
self.yes_match.write_all(&[config.out_delim])?;
x.write_head(&mut *self.yes_match, &self.r)?;
}
self.yes_match.0.write_all(&[b'\n'])?;
}
if config.jtype == JoinType::Quick {
self.join_quick(config)
} else {
err!("Only quick supported")
}
}
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Verify file contents.", args::FileCount::Many);
const A: [ArgSpec; 10] = [
arg! {"report", "r", "Number", "How many failures to report before exit."},
arg! {"first", "f", "Op,Value", "'FirstLine Op Value' must be true. E.g LT,a for first line is less than 'a'."},
arg! {"last", "l", "Op,Value", "'LastLine Op Value' must be true."},
arg! {"key", "k", "Spec", "How to compare adjacent lines"},
arg! {"sort", "s", "", "Check that the file is sorted."},
arg! {"unique", "u", "", "Check that the file is sorted, with unique lines."},
arg! {"pattern", "p", "Col,Spec,Pattern", "Select line where this col matches this pattern."},
arg! {"show-matchers", "", "", "Print available matchers"},
arg! {"show-const", "", "", "Print available constants"},
arg! {"show-func", "", "", "Print available functions"},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut list = LineMatcherList::new_with(Combiner::And);
let mut comp = LineCompList::new();
let mut do_sort = false;
let mut do_unique = false;
let mut max_fails = 5;
let mut first: Option<CheckLine> = None;
let mut last: Option<CheckLine> = None;
for x in args {
if x.name == "pattern" {
list.push(&x.value)?;
} else if x.name == "key" {
comp.add(&x.value)?;
} else if x.name == "or" {
list.multi = Combiner::Or;
} else if x.name == "fail" {
max_fails = x.value.to_usize_whole(x.value.as_bytes(), "max fails")?;
} else if x.name == "sort" {
do_sort = true;
} else if x.name == "first" {
first = Some(CheckLine::new(&x.value)?);
} else if x.name == "last" {
last = Some(CheckLine::new(&x.value)?);
} else if x.name == "unique" {
do_sort = true;
do_unique = true;
} else if x.name == "show-const" {
expr::show_const();
return Ok(());
} else if x.name == "show-func" {
expr::show_func();
return Ok(());
} else {
unreachable!();
}
}
if comp.is_empty() {
comp.add("")?;
}
let mut fails = 0;
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if f.is_empty() {
continue;
}
list.lookup(&f.names())?;
comp.lookup(&f.names())?;
if f.is_done() {
continue;
}
if first.is_some()
&& !first.as_ref().unwrap().line_ok_verbose(
f.curr_line(),
&mut comp,
f.line_number(),
)?
{
fails += 1;
}
let num_cols = f.names().len();
loop {
let mut did_fail = false;
if f.curr().len() != num_cols {
eprintln!(
"Expected {num_cols} columns, but line {} of {} had {}",
f.line_number() + 1,
x,
f.curr().len()
);
did_fail = true;
}
if !list.ok_verbose(f.curr_line(), f.line_number(), x) {
did_fail = true;
}
if f.getline()? {
if last.is_some()
&& !last.as_ref().unwrap().line_ok_verbose(
f.prev_line(1),
&mut comp,
f.line_number() - 1,
)?
{
fails += 1;
}
break;
}
if do_sort {
did_fail = did_fail || comp_check(&f, &mut comp, do_unique);
}
if did_fail {
fails += 1;
if fails >= max_fails {
break;
}
}
}
if fails > 0 {
return cdx_err(CdxError::Silent);
}
}
Ok(())
}
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Select uniq lines.", args::FileCount::One);
const A: [ArgSpec; 7] = [
arg! {"agg", "a", "Col,Spec", "Merge value from this column, in place."},
arg! {"agg-pre", "", "NewCol,SrcCol,Spec", "Merge value from SrcCol into new column, before other columns."},
arg! {"agg-post", "", "NewCol,SrcCol,Spec", "Merge value from SrcCol into new column, after other columns."},
arg! {"key", "k", "Spec", "How to compare adjacent lines"},
arg! {"count", "c", "ColName,Position", "Write the count of matching line."},
arg! {"which", "w", "(First,Last,Min,Max)[,LineCompare]", "Which of the matching lines should be printed."},
arg! {"agg-help", "", "", "Print help for aggregators"},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut agg = LineAggList::new();
let mut comp = LineCompList::new();
let mut count = Count::default();
for x in args {
if x.name == "key" {
comp.add(&x.value)?;
} else if x.name == "count" {
count.get_count(&x.value)?;
} else if x.name == "which" {
count.get_which(&x.value)?;
} else if x.name == "agg" {
agg.push_replace(&x.value)?;
} else if x.name == "agg-post" {
agg.push_append(&x.value)?;
} else if x.name == "agg-pre" {
agg.push_prefix(&x.value)?;
} else {
unreachable!();
}
}
assert_eq!(files.len(), 1);
let mut f = Reader::new();
f.open(&files[0])?;
if f.is_empty() {
return Ok(());
}
comp.lookup(&f.names())?;
count.lookup(&f.names())?;
let mut c_write = Writer::new(f.delim());
if !agg.is_empty() {
if count.pos == CountPos::Begin {
agg.push_first_prefix(&format!("{},1,count", count.name))?;
}
if count.pos == CountPos::End {
agg.push_append(&format!("{},1,count", count.name))?;
}
agg.lookup(&f.names())?;
agg.fill(&mut c_write, f.header());
c_write.lookup(&f.names())?;
}
let mut w = get_writer("-")?;
if f.has_header() {
let mut ch = ColumnHeader::new();
if agg.is_empty() {
if count.pos == CountPos::Begin {
ch.push(&count.name)?;
}
ch.push_all(f.header())?;
if count.pos == CountPos::End {
ch.push(&count.name)?;
}
} else {
c_write.add_names(&mut ch, f.header())?;
}
w.write_all(ch.get_head(f.delim()).as_bytes())?;
}
if f.is_done() {
return Ok(());
}
f.do_split(comp.need_split());
let mut matches = 1;
if !agg.is_empty() {
agg.add(f.curr_line());
let mut tmp = f.curr_line().clone();
loop {
if f.getline()? {
c_write.write(&mut w.0, &tmp)?;
break;
}
if comp.equal_cols(f.prev_line(1), f.curr_line()) {
count.assign(&mut tmp, f.curr_line());
agg.add(f.curr_line());
} else {
c_write.write(&mut w.0, &tmp)?;
tmp.assign(f.curr_line());
agg.reset();
agg.add(f.curr_line());
}
}
} else if count.which == Which::Last {
loop {
if f.getline()? {
count.write(&mut w.0, matches, f.prev_line(1).line(), f.delim())?;
break;
}
if comp.equal_cols(f.prev_line(1), f.curr_line()) {
matches += 1;
} else {
count.write(&mut w.0, matches, f.prev_line(1).line(), f.delim())?;
matches = 1;
}
}
} else if count.which == Which::First && count.is_plain() {
f.write_curr(&mut w.0)?;
loop {
if f.getline()? {
break;
}
if !comp.equal_cols(f.prev_line(1), f.curr_line()) {
f.write_curr(&mut w.0)?;
}
}
} else {
let mut tmp = f.curr_line().clone();
loop {
if f.getline()? {
count.write(&mut w.0, matches, tmp.line(), f.delim())?;
break;
}
if comp.equal_cols(f.prev_line(1), f.curr_line()) {
count.assign(&mut tmp, f.curr_line());
matches += 1;
} else {
count.write(&mut w.0, matches, tmp.line(), f.delim())?;
tmp.assign(f.curr_line());
matches = 1;
}
}
}
Ok(())
}
sourcepub fn write(&self, w: &mut impl Write) -> Result<()>
pub fn write(&self, w: &mut impl Write) -> Result<()>
write the current text line with newline
Examples found in repository
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
pub fn merge_2(
&self,
left: &str,
right: &str,
cmp: &mut LineCompList,
mut w: impl Write,
unique: bool,
) -> Result<()> {
let mut left_file = Reader::new();
let mut right_file = Reader::new();
left_file.open(left)?;
right_file.open(right)?;
left_file.do_split(false);
right_file.do_split(false);
cmp.lookup(&left_file.names())?;
// FIXME -- Check Header
if left_file.has_header() {
w.write_all(left_file.header().line.as_bytes())?;
}
if unique {
let mut prev: Vec<u8> = Vec::new();
while !left_file.is_done() && !right_file.is_done() {
let ord = cmp.comp_lines(left_file.curr().line(), right_file.curr().line());
if ord == Ordering::Less {
left_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
left_file.getline()?;
} else if ord == Ordering::Greater {
right_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
right_file.getline()?;
} else {
left_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
left_file.getline()?;
right_file.getline()?;
}
while !left_file.is_done() && cmp.equal_lines(left_file.curr().line(), &prev) {
left_file.getline()?;
}
while !right_file.is_done() && cmp.equal_lines(right_file.curr().line(), &prev) {
right_file.getline()?;
}
}
} else {
while !left_file.is_done() && !right_file.is_done() {
let ord = cmp.comp_lines(left_file.curr().line(), right_file.curr().line());
// if Equal, write both lines
if ord != Ordering::Less {
right_file.write(&mut w)?;
right_file.getline()?;
}
if ord != Ordering::Greater {
left_file.write(&mut w)?;
left_file.getline()?;
}
}
}
while !left_file.is_done() {
left_file.write(&mut w)?;
left_file.getline()?;
}
while !right_file.is_done() {
right_file.write(&mut w)?;
right_file.getline()?;
}
Ok(())
}
More examples
251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320
fn join_quick(&mut self, config: &JoinConfig) -> Result<()> {
if !self.r[0].is_done() && !self.r[1].is_done() {
let mut cmp = self
.comp
.comp_cols_n(self.r[0].curr(), self.r[1].curr(), 0, 1);
'outer: loop {
match cmp {
Ordering::Equal => loop {
self.out_cols[0].write(&mut *self.yes_match, &self.r)?;
for x in &self.out_cols[1..] {
self.yes_match.write_all(&[config.out_delim])?;
x.write(&mut *self.yes_match, &self.r)?;
}
self.yes_match.write_all(&[b'\n'])?;
if self.r[0].getline()? {
self.r[1].getline()?;
break 'outer;
}
cmp = self
.comp
.comp_cols_n(self.r[0].curr(), self.r[1].curr(), 0, 1);
if cmp != Ordering::Equal {
if self.r[1].getline()? {
break 'outer;
}
cmp = self
.comp
.comp_cols_n(self.r[0].curr(), self.r[1].curr(), 0, 1);
break;
}
},
Ordering::Less => {
if let Some(x) = &mut self.no_match[0] {
self.r[0].write(&mut x.0)?;
}
if self.r[0].getline()? {
break;
}
cmp = self
.comp
.comp_cols_n(self.r[0].curr(), self.r[1].curr(), 0, 1);
}
Ordering::Greater => {
if let Some(x) = &mut self.no_match[1] {
self.r[1].write(&mut x.0)?;
}
if self.r[1].getline()? {
break;
}
cmp = self
.comp
.comp_cols_n(self.r[0].curr(), self.r[1].curr(), 0, 1);
}
}
}
}
while !self.r[0].is_done() {
if let Some(x) = &mut self.no_match[0] {
self.r[0].write(&mut x.0)?;
}
self.r[0].getline()?;
}
while !self.r[1].is_done() {
if let Some(x) = &mut self.no_match[1] {
self.r[1].write(&mut x.0)?;
}
self.r[1].getline()?;
}
Ok(())
}
sourcepub fn open(&mut self, name: &str) -> Result<()>
pub fn open(&mut self, name: &str) -> Result<()>
open file for reading
Examples found in repository
594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666
fn load_hashset(data: &mut HashSet<Vec<u8>>, fname: &str) -> Result<()> {
let mut f = Reader::new();
f.do_split(false);
f.open(fname)?;
if f.is_done() {
return Ok(());
}
loop {
let line = &f.curr().line();
if line.len() > 1 {
data.insert(line[0..line.len() - 1].to_vec());
}
if f.getline()? {
break;
}
}
Ok(())
}
#[derive(Debug, Clone)]
/// pattern is file name. String exactly matches one line of file.
struct FileExactMatch {
data: HashSet<Vec<u8>>,
file_name: String,
}
impl FileExactMatch {
fn new(file_name: &str) -> Result<Self> {
let mut d = HashSet::new();
load_hashset(&mut d, file_name)?;
Ok(Self {
data: d,
file_name: file_name.to_string(),
})
}
}
impl Match for FileExactMatch {
fn smatch(&self, buff: &str) -> bool {
self.data.contains(buff.as_bytes())
}
fn umatch(&self, buff: &[u8]) -> bool {
self.data.contains(buff)
}
fn show(&self) -> String {
format!("Exact Match of one line in file {}", self.file_name)
}
}
fn load_hashset_c(data: &mut HashSet<Vec<u8>>, fname: &str, unicode: bool) -> Result<()> {
let mut f = Reader::new();
f.do_split(false);
f.open(fname)?;
if f.is_done() {
return Ok(());
}
loop {
let mut line: &[u8] = f.curr().line();
if line.len() > 1 {
if line.last().unwrap() == &b'\n' {
line = &line[..line.len() - 1];
}
if unicode {
data.insert(String::from_utf8(line.to_vec())?.new_lower().into_bytes());
// PERF - 2 allocations
} else {
data.insert(line.new_lower());
}
}
if f.getline()? {
break;
}
}
Ok(())
}
More examples
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
pub fn transpose(file: &str, head: bool, max_lines: usize) -> Result<()> {
let mut f = Reader::new();
f.open(file)?;
if f.is_empty() {
return Ok(());
}
let mut data = Vec::new();
let mut lines = 0;
while lines < max_lines {
data.push(f.curr().clone());
lines += 1;
if f.getline()? {
break;
}
}
let mut w = get_writer("-")?;
if head {
w.write_all(b" CDX\t")?;
}
for i in 0..f.header().len() {
let mut need_tab = if f.has_header() {
w.write_all(f.header()[i].as_bytes())?;
true
} else {
false
};
for x in &data {
if need_tab {
w.write_all(b"\t")?;
}
need_tab = true;
w.write_all(&x[i])?;
}
w.write_all(b"\n")?;
}
Ok(())
}
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Evaluate Formatted Expressions.", args::FileCount::Many);
const A: [ArgSpec; 1] = [arg! {"fmt", "f", "Format", "How to format values."}];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut fmt = NumFormat::default();
for x in args {
if x.name == "fmt" {
fmt = NumFormat::new(&x.value)?;
} else {
unreachable!();
}
}
let mut w = get_writer("-")?;
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if f.is_empty() || f.is_done() {
continue;
}
loop {
let exp = &String::from_utf8_lossy(f.curr_nl());
let (f2, exp) = parse_fmt_expr(fmt, exp);
fmt = f2;
match calc(exp) {
Ok(v) => {
fmt.print(v, &mut w.0)?;
w.write_all(b"\n")?;
w.flush()?;
}
Err(e) => eprintln!("{}", e),
}
if f.getline()? {
break;
}
}
}
Ok(())
}
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
pub fn show(file: &str, screen: &Rect) -> Result<()> {
let mut f = Reader::new();
f.open(file)?;
if f.is_empty() {
return Ok(());
}
let mut lines: Vec<StringLine> = Vec::new();
let mut sizes: Vec<usize> = Vec::new();
sizes.resize(f.header().len(), 0);
if f.has_header() {
lines.push(f.header().clone());
}
if !f.is_done() {
while lines.len() < screen.height {
let mut s = StringLine::new();
s.line = String::from_utf8_lossy(f.curr().line()).to_string();
s.split(f.delim());
lines.push(s);
if f.getline()? {
break;
}
}
}
for x in &lines {
for (i, c) in x.iter().enumerate() {
let width = UnicodeWidthStr::width(c);
if sizes[i] < width {
sizes[i] = width;
}
}
}
let mut total: usize = sizes.iter().sum();
let target = screen.width - sizes.len();
while total > target {
dec_max(&mut sizes);
total = sizes.iter().sum();
}
let mut w = get_writer("-")?;
let mut do_center = f.has_header();
for x in &lines {
let mut need_space = false;
for (c, y) in x.iter().enumerate() {
let (nstr, width) = y.unicode_truncate(sizes[c]);
if need_space {
w.write_all(b" ")?;
}
let num = (sizes[c] - width) / 2;
if do_center {
for _ in 0..num {
w.write_all(b" ")?;
}
}
w.write_all(nstr.as_bytes())?;
if do_center {
let num2 = (sizes[c] - width) - num;
for _ in 0..num2 {
w.write_all(b" ")?;
}
} else {
for _ in width..sizes[c] {
w.write_all(b" ")?;
}
}
need_space = true;
}
do_center = false;
w.write_all(b"\n")?;
}
Ok(())
}
/// show the file in a specific rectangle
pub fn show2(file: &str, screen: &Rect, w: &mut Vec<String>) -> Result<usize> {
let mut f = Reader::new();
f.open(file)?;
if f.is_empty() {
return Ok(0);
}
let mut lines: Vec<StringLine> = Vec::new();
let mut sizes: Vec<usize> = Vec::new();
sizes.resize(f.header().len(), 0);
if f.has_header() {
lines.push(f.header().clone());
}
if !f.is_done() {
while lines.len() < screen.height {
let mut s = StringLine::new();
s.line = String::from_utf8_lossy(f.curr().line()).to_string();
s.split(f.delim());
lines.push(s);
if f.getline()? {
break;
}
}
}
for x in &lines {
for (i, c) in x.iter().enumerate() {
let width = UnicodeWidthStr::width(c);
if sizes[i] < width {
sizes[i] = width;
}
}
}
let mut total: usize = sizes.iter().sum();
let target = screen.width - sizes.len();
while total > target {
dec_max(&mut sizes);
total = sizes.iter().sum();
}
let mut do_center = f.has_header();
w.clear();
for x in &lines {
let mut s = String::new();
let mut need_space = false;
for (c, y) in x.iter().enumerate() {
let (nstr, width) = y.unicode_truncate(sizes[c]);
if need_space {
s.push(' ');
}
let num = (sizes[c] - width) / 2;
if do_center {
for _ in 0..num {
s.push(' ');
}
}
s.push_str(nstr);
if do_center {
let num2 = (sizes[c] - width) - num;
for _ in 0..num2 {
s.push(' ');
}
} else {
for _ in width..sizes[c] {
s.push(' ');
}
}
need_space = true;
}
do_center = false;
w.push(s);
}
Ok(sizes.iter().sum::<usize>() + sizes.len())
}
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Select columns", args::FileCount::Many);
const A: [ArgSpec; 5] = [
arg! {"fields", "f", "Columns", "the columns to select."},
arg! {"group", "g", "Columns", "the columns in a bunch, e.g. '.group:1-3'"},
arg! {"expr", "e", "Name:Expr", "The result of an arithmetic expression"},
arg! {"composite", "c", "Spec", "new value made from parts. e.g. 'stuff:abc^{two}def'"},
arg_enum! {"dups", "D", "Mode", "Duplicate Column Handling", &["Fail", "Allow", "Numeric"]},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut header = ColumnHeader::new();
let mut v = Writer::new(b'\t');
for x in args {
if x.name == "dups" {
header.set_handling(DupColHandling::new(&x.value)?);
} else if x.name == "fields" {
v.push(Box::new(ReaderColumns::new(ColumnSet::from_spec(
&x.value,
)?)));
} else if x.name == "group" {
v.push(Box::new(ColumnClump::from_spec(&x.value)?));
} else if x.name == "expr" {
v.push(Box::new(ColumnExpr::new(&x.value)?));
} else if x.name == "composite" {
v.push(Box::new(CompositeColumn::new(&x.value)?));
} else {
unreachable!();
}
}
if v.is_empty() {
bail!("cut requires at lease one --columns or --groups");
}
let mut w = get_writer("-")?;
let mut not_header = String::new();
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if f.is_empty() {
continue;
}
v.lookup(&f.names())?;
header.clear();
not_header.clear();
v.add_names(&mut header, f.header())?;
if f.has_header() {
not_header = header.get_head(b'\t');
}
if settings.checker.check(not_header.as_bytes(), x)? {
w.write_all(not_header.as_bytes())?;
}
if f.is_done() {
continue;
}
loop {
v.write(&mut w.0, f.curr())?;
if f.getline()? {
break;
}
}
}
Ok(())
}
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
pub fn merge_2(
&self,
left: &str,
right: &str,
cmp: &mut LineCompList,
mut w: impl Write,
unique: bool,
) -> Result<()> {
let mut left_file = Reader::new();
let mut right_file = Reader::new();
left_file.open(left)?;
right_file.open(right)?;
left_file.do_split(false);
right_file.do_split(false);
cmp.lookup(&left_file.names())?;
// FIXME -- Check Header
if left_file.has_header() {
w.write_all(left_file.header().line.as_bytes())?;
}
if unique {
let mut prev: Vec<u8> = Vec::new();
while !left_file.is_done() && !right_file.is_done() {
let ord = cmp.comp_lines(left_file.curr().line(), right_file.curr().line());
if ord == Ordering::Less {
left_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
left_file.getline()?;
} else if ord == Ordering::Greater {
right_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
right_file.getline()?;
} else {
left_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
left_file.getline()?;
right_file.getline()?;
}
while !left_file.is_done() && cmp.equal_lines(left_file.curr().line(), &prev) {
left_file.getline()?;
}
while !right_file.is_done() && cmp.equal_lines(right_file.curr().line(), &prev) {
right_file.getline()?;
}
}
} else {
while !left_file.is_done() && !right_file.is_done() {
let ord = cmp.comp_lines(left_file.curr().line(), right_file.curr().line());
// if Equal, write both lines
if ord != Ordering::Less {
right_file.write(&mut w)?;
right_file.getline()?;
}
if ord != Ordering::Greater {
left_file.write(&mut w)?;
left_file.getline()?;
}
}
}
while !left_file.is_done() {
left_file.write(&mut w)?;
left_file.getline()?;
}
while !right_file.is_done() {
right_file.write(&mut w)?;
right_file.getline()?;
}
Ok(())
}
sourcepub const fn header_line(&self) -> &String
pub const fn header_line(&self) -> &String
The full text of the header, without the trailing newline
sourcepub const fn is_empty(&self) -> bool
pub const fn is_empty(&self) -> bool
was file zero bytes?
Examples found in repository
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
pub fn transpose(file: &str, head: bool, max_lines: usize) -> Result<()> {
let mut f = Reader::new();
f.open(file)?;
if f.is_empty() {
return Ok(());
}
let mut data = Vec::new();
let mut lines = 0;
while lines < max_lines {
data.push(f.curr().clone());
lines += 1;
if f.getline()? {
break;
}
}
let mut w = get_writer("-")?;
if head {
w.write_all(b" CDX\t")?;
}
for i in 0..f.header().len() {
let mut need_tab = if f.has_header() {
w.write_all(f.header()[i].as_bytes())?;
true
} else {
false
};
for x in &data {
if need_tab {
w.write_all(b"\t")?;
}
need_tab = true;
w.write_all(&x[i])?;
}
w.write_all(b"\n")?;
}
Ok(())
}
More examples
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Evaluate Formatted Expressions.", args::FileCount::Many);
const A: [ArgSpec; 1] = [arg! {"fmt", "f", "Format", "How to format values."}];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut fmt = NumFormat::default();
for x in args {
if x.name == "fmt" {
fmt = NumFormat::new(&x.value)?;
} else {
unreachable!();
}
}
let mut w = get_writer("-")?;
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if f.is_empty() || f.is_done() {
continue;
}
loop {
let exp = &String::from_utf8_lossy(f.curr_nl());
let (f2, exp) = parse_fmt_expr(fmt, exp);
fmt = f2;
match calc(exp) {
Ok(v) => {
fmt.print(v, &mut w.0)?;
w.write_all(b"\n")?;
w.flush()?;
}
Err(e) => eprintln!("{}", e),
}
if f.getline()? {
break;
}
}
}
Ok(())
}
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
pub fn show(file: &str, screen: &Rect) -> Result<()> {
let mut f = Reader::new();
f.open(file)?;
if f.is_empty() {
return Ok(());
}
let mut lines: Vec<StringLine> = Vec::new();
let mut sizes: Vec<usize> = Vec::new();
sizes.resize(f.header().len(), 0);
if f.has_header() {
lines.push(f.header().clone());
}
if !f.is_done() {
while lines.len() < screen.height {
let mut s = StringLine::new();
s.line = String::from_utf8_lossy(f.curr().line()).to_string();
s.split(f.delim());
lines.push(s);
if f.getline()? {
break;
}
}
}
for x in &lines {
for (i, c) in x.iter().enumerate() {
let width = UnicodeWidthStr::width(c);
if sizes[i] < width {
sizes[i] = width;
}
}
}
let mut total: usize = sizes.iter().sum();
let target = screen.width - sizes.len();
while total > target {
dec_max(&mut sizes);
total = sizes.iter().sum();
}
let mut w = get_writer("-")?;
let mut do_center = f.has_header();
for x in &lines {
let mut need_space = false;
for (c, y) in x.iter().enumerate() {
let (nstr, width) = y.unicode_truncate(sizes[c]);
if need_space {
w.write_all(b" ")?;
}
let num = (sizes[c] - width) / 2;
if do_center {
for _ in 0..num {
w.write_all(b" ")?;
}
}
w.write_all(nstr.as_bytes())?;
if do_center {
let num2 = (sizes[c] - width) - num;
for _ in 0..num2 {
w.write_all(b" ")?;
}
} else {
for _ in width..sizes[c] {
w.write_all(b" ")?;
}
}
need_space = true;
}
do_center = false;
w.write_all(b"\n")?;
}
Ok(())
}
/// show the file in a specific rectangle
pub fn show2(file: &str, screen: &Rect, w: &mut Vec<String>) -> Result<usize> {
let mut f = Reader::new();
f.open(file)?;
if f.is_empty() {
return Ok(0);
}
let mut lines: Vec<StringLine> = Vec::new();
let mut sizes: Vec<usize> = Vec::new();
sizes.resize(f.header().len(), 0);
if f.has_header() {
lines.push(f.header().clone());
}
if !f.is_done() {
while lines.len() < screen.height {
let mut s = StringLine::new();
s.line = String::from_utf8_lossy(f.curr().line()).to_string();
s.split(f.delim());
lines.push(s);
if f.getline()? {
break;
}
}
}
for x in &lines {
for (i, c) in x.iter().enumerate() {
let width = UnicodeWidthStr::width(c);
if sizes[i] < width {
sizes[i] = width;
}
}
}
let mut total: usize = sizes.iter().sum();
let target = screen.width - sizes.len();
while total > target {
dec_max(&mut sizes);
total = sizes.iter().sum();
}
let mut do_center = f.has_header();
w.clear();
for x in &lines {
let mut s = String::new();
let mut need_space = false;
for (c, y) in x.iter().enumerate() {
let (nstr, width) = y.unicode_truncate(sizes[c]);
if need_space {
s.push(' ');
}
let num = (sizes[c] - width) / 2;
if do_center {
for _ in 0..num {
s.push(' ');
}
}
s.push_str(nstr);
if do_center {
let num2 = (sizes[c] - width) - num;
for _ in 0..num2 {
s.push(' ');
}
} else {
for _ in width..sizes[c] {
s.push(' ');
}
}
need_space = true;
}
do_center = false;
w.push(s);
}
Ok(sizes.iter().sum::<usize>() + sizes.len())
}
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Select columns", args::FileCount::Many);
const A: [ArgSpec; 5] = [
arg! {"fields", "f", "Columns", "the columns to select."},
arg! {"group", "g", "Columns", "the columns in a bunch, e.g. '.group:1-3'"},
arg! {"expr", "e", "Name:Expr", "The result of an arithmetic expression"},
arg! {"composite", "c", "Spec", "new value made from parts. e.g. 'stuff:abc^{two}def'"},
arg_enum! {"dups", "D", "Mode", "Duplicate Column Handling", &["Fail", "Allow", "Numeric"]},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut header = ColumnHeader::new();
let mut v = Writer::new(b'\t');
for x in args {
if x.name == "dups" {
header.set_handling(DupColHandling::new(&x.value)?);
} else if x.name == "fields" {
v.push(Box::new(ReaderColumns::new(ColumnSet::from_spec(
&x.value,
)?)));
} else if x.name == "group" {
v.push(Box::new(ColumnClump::from_spec(&x.value)?));
} else if x.name == "expr" {
v.push(Box::new(ColumnExpr::new(&x.value)?));
} else if x.name == "composite" {
v.push(Box::new(CompositeColumn::new(&x.value)?));
} else {
unreachable!();
}
}
if v.is_empty() {
bail!("cut requires at lease one --columns or --groups");
}
let mut w = get_writer("-")?;
let mut not_header = String::new();
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if f.is_empty() {
continue;
}
v.lookup(&f.names())?;
header.clear();
not_header.clear();
v.add_names(&mut header, f.header())?;
if f.has_header() {
not_header = header.get_head(b'\t');
}
if settings.checker.check(not_header.as_bytes(), x)? {
w.write_all(not_header.as_bytes())?;
}
if f.is_done() {
continue;
}
loop {
v.write(&mut w.0, f.curr())?;
if f.getline()? {
break;
}
}
}
Ok(())
}
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Select uniq lines.", args::FileCount::Many);
const A: [ArgSpec; 6] = [
arg! {"pattern", "p", "Col,Spec,Pattern", "Select line where this col matches this pattern."},
arg! {"show-const", "", "", "Print available constants"},
arg! {"show-func", "", "", "Print available functions"},
arg! {"or", "o", "", "A line matches if any of the matchers matches."},
arg! {"invert", "v", "", "Print lines that don't match."},
arg! {"location", "l", "name:what", "prefix extra columns of location context."},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut list = LineMatcherList::new_with(Combiner::And);
let mut reverse = false;
let mut loc = FileLocList::new();
for x in args {
if x.name == "pattern" {
list.push(&x.value)?;
} else if x.name == "or" {
list.multi = Combiner::Or;
} else if x.name == "invert" {
reverse = true;
} else if x.name == "location" {
loc.push(&x.value)?;
} else if x.name == "show-const" {
expr::show_const();
return Ok(());
} else if x.name == "show-func" {
expr::show_func();
return Ok(());
} else {
unreachable!();
}
}
let mut w = get_writer("-")?;
for x in &files {
let mut f = Reader::new_open(x)?;
if f.is_empty() {
continue;
}
list.lookup(&f.names())?;
let mut not_header = String::new();
let mut header = ColumnHeader::new();
loc.add(&mut header)?;
header.push_all(f.header())?;
if f.has_header() {
not_header = header.get_head(b'\t');
}
if settings.checker.check(not_header.as_bytes(), x)? {
w.write_all(not_header.as_bytes())?;
}
if f.is_done() {
continue;
}
loop {
if list.ok(f.curr_line()) ^ reverse {
// write previous lines of context if necessary
loc.write_data(&mut w.0, b'\t', f.loc())?;
f.write_curr(&mut w.0)?;
} else {
// write more lines of context if necessary
}
if f.getline()? {
break;
}
}
}
Ok(())
}
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Sample lines from files.", args::FileCount::Many);
const A: [ArgSpec; 4] = [
arg_enum! {"header", "h", "Mode", "header requirements", &HEADER_MODE},
arg! {"for", "f", "by,from,to", "for i=from; i<=to; i+= by"},
arg! {"sample", "s", "Number", "Select this number of lines, more or less evenly spaced."},
arg! {"range", "r", "Ranges", "e.g. 1-5,42,95-106."},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut checker = HeaderChecker::new();
let mut floop = For::default();
let mut ranges = Ranges::default();
let mut sample = 10;
let mut saw_sample = false;
let mut saw_for = false;
let mut saw_range = false;
for x in args {
if x.name == "header" {
checker.mode = HeaderMode::from_str(&x.value)?;
} else if x.name == "for" {
if saw_for || saw_sample || saw_range {
return err!("No more that one --sample, --range or --for allowed");
}
floop = For::new(&x.value)?;
saw_for = true;
} else if x.name == "range" {
if saw_for || saw_sample || saw_range {
return err!("No more that one --sample, --range or --for allowed");
}
saw_range = true;
for x in x.value.split(',') {
ranges.push(x)?;
}
} else if x.name == "sample" {
if saw_for || saw_sample || saw_range {
return err!("No more that one --sample, --range or --for allowed");
}
sample = x.value.to_usize_whole(x.value.as_bytes(), "sample size")?;
saw_sample = true;
} else {
unreachable!();
}
}
if !saw_for && !saw_sample && !saw_range {
saw_sample = true;
sample = Rect::from_screen().height;
}
let mut w = get_writer("-")?;
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if f.is_empty() {
continue;
}
if checker.check_file(&f, x)? {
w.write_all(f.header().line.as_bytes())?;
}
if f.is_done() {
return Ok(());
}
f.do_split(false);
if saw_sample {
let mut s = Smooth::new(sample);
loop {
s.add(f.curr().line());
if f.getline()? {
break;
}
}
s.finalize(&mut w.0)?;
} else if saw_for {
let mut next = floop.from;
while f.line_number() <= floop.to {
if f.line_number() == next {
w.write_all(f.curr_line().line())?;
next += floop.by;
}
if f.getline()? {
break;
}
}
} else {
let max = ranges.max();
while f.line_number() <= max {
if ranges.contains(f.line_number()) {
w.write_all(f.curr_line().line())?;
}
if f.getline()? {
break;
}
}
}
}
Ok(())
}
sourcepub const fn is_done(&self) -> bool
pub const fn is_done(&self) -> bool
have we hit EOF?
Examples found in repository
594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666
fn load_hashset(data: &mut HashSet<Vec<u8>>, fname: &str) -> Result<()> {
let mut f = Reader::new();
f.do_split(false);
f.open(fname)?;
if f.is_done() {
return Ok(());
}
loop {
let line = &f.curr().line();
if line.len() > 1 {
data.insert(line[0..line.len() - 1].to_vec());
}
if f.getline()? {
break;
}
}
Ok(())
}
#[derive(Debug, Clone)]
/// pattern is file name. String exactly matches one line of file.
struct FileExactMatch {
data: HashSet<Vec<u8>>,
file_name: String,
}
impl FileExactMatch {
fn new(file_name: &str) -> Result<Self> {
let mut d = HashSet::new();
load_hashset(&mut d, file_name)?;
Ok(Self {
data: d,
file_name: file_name.to_string(),
})
}
}
impl Match for FileExactMatch {
fn smatch(&self, buff: &str) -> bool {
self.data.contains(buff.as_bytes())
}
fn umatch(&self, buff: &[u8]) -> bool {
self.data.contains(buff)
}
fn show(&self) -> String {
format!("Exact Match of one line in file {}", self.file_name)
}
}
fn load_hashset_c(data: &mut HashSet<Vec<u8>>, fname: &str, unicode: bool) -> Result<()> {
let mut f = Reader::new();
f.do_split(false);
f.open(fname)?;
if f.is_done() {
return Ok(());
}
loop {
let mut line: &[u8] = f.curr().line();
if line.len() > 1 {
if line.last().unwrap() == &b'\n' {
line = &line[..line.len() - 1];
}
if unicode {
data.insert(String::from_utf8(line.to_vec())?.new_lower().into_bytes());
// PERF - 2 allocations
} else {
data.insert(line.new_lower());
}
}
if f.getline()? {
break;
}
}
Ok(())
}
More examples
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Evaluate Formatted Expressions.", args::FileCount::Many);
const A: [ArgSpec; 1] = [arg! {"fmt", "f", "Format", "How to format values."}];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut fmt = NumFormat::default();
for x in args {
if x.name == "fmt" {
fmt = NumFormat::new(&x.value)?;
} else {
unreachable!();
}
}
let mut w = get_writer("-")?;
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if f.is_empty() || f.is_done() {
continue;
}
loop {
let exp = &String::from_utf8_lossy(f.curr_nl());
let (f2, exp) = parse_fmt_expr(fmt, exp);
fmt = f2;
match calc(exp) {
Ok(v) => {
fmt.print(v, &mut w.0)?;
w.write_all(b"\n")?;
w.flush()?;
}
Err(e) => eprintln!("{}", e),
}
if f.getline()? {
break;
}
}
}
Ok(())
}
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607
pub fn merge_t2(
&self,
in_files: &[String],
cmp: &mut LineCompList,
mut w: impl Write,
unique: bool,
_tmp: &TempDir,
) -> Result<()> {
if in_files.is_empty() {
return Ok(());
}
if in_files.len() == 1 && !unique {
let r = get_reader(&in_files[0])?;
return copy(r.0, w);
}
let mc = Rc::new(RefCell::new(MergeContext{open : Vec::with_capacity(in_files.len()), cmp}));
let mut heap = BinaryHeap::new_by(|a: &usize, b: &usize| mc.borrow_mut().compare(*a, *b));
{
let mut mcm = mc.borrow_mut();
for x in in_files {
mcm.open.push(Reader::new_open(x)?);
}
if !mcm.cmp.need_split() {
for x in &mut mcm.open {
x.do_split(false);
}
}
// FIXME -- Check Header
if mcm.open[0].has_header() {
w.write_all(mcm.open[0].header().line.as_bytes())?;
}
}
for i in 0..in_files.len() {
if !mc.borrow().open[i].is_done() {
heap.push(i)
}
}
if unique {
if heap.is_empty() {
return Ok(());
}
let first = heap.pop().unwrap();
let mut prev = mc.borrow().open[first].curr_line().clone();
if !mc.borrow_mut().open[first].getline()? {
heap.push(first);
}
w.write_all(prev.line())?;
while !heap.is_empty() {
if let Some(x) = heap.pop() {
let eq = mc.borrow_mut().equal(&prev, x);
if !eq {
let mcm = mc.borrow();
w.write_all(mcm.open[x].curr_line().line())?;
prev.assign(mcm.open[x].curr_line());
}
if !mc.borrow_mut().open[x].getline()? {
heap.push(x);
}
}
}
} else {
while !heap.is_empty() {
if let Some(x) = heap.pop() {
w.write_all(mc.borrow_mut().open[x].curr_line().line())?;
if !mc.borrow_mut().open[x].getline()? {
heap.push(x);
}
}
}
}
Ok(())
}
/// merge all the files into w, using tmp
pub fn merge_t1 (
&self,
in_files: &[String],
cmp: &mut LineCompList,
mut w: impl Write,
unique: bool,
_tmp: &TempDir,
) -> Result<()> {
if in_files.is_empty() {
return Ok(());
}
if in_files.len() == 1 && !unique {
let r = get_reader(&in_files[0])?;
return copy(r.0, w);
}
let mut open_files: Vec<Reader> = Vec::with_capacity(in_files.len());
for x in in_files {
open_files.push(Reader::new_open(x)?);
}
if !cmp.need_split() {
for x in &mut open_files {
x.do_split(false);
}
}
// FIXME -- Check Header
if open_files[0].has_header() {
w.write_all(open_files[0].header().line.as_bytes())?;
}
let nums: Vec<usize> = (0..open_files.len()).collect();
let mut mm = MergeTreeItem::new_tree(&open_files, &nums);
if unique {
let x = mm.next(cmp, &mut open_files)?;
if x.is_none() {
return Ok(());
}
let x = x.unwrap();
w.write_all(open_files[x].curr_line().line())?;
let mut prev = open_files[x].curr_line().clone();
loop {
let x = mm.next(cmp, &mut open_files)?;
if x.is_none() {
break;
}
let x = x.unwrap();
if !cmp.equal_cols(&prev, open_files[x].curr_line()) {
w.write_all(open_files[x].curr_line().line())?;
}
prev.assign(open_files[x].curr_line());
}
} else {
loop {
let x = mm.next(cmp, &mut open_files)?;
if x.is_none() {
break;
}
let x = x.unwrap();
w.write_all(open_files[x].curr_line().line())?;
}
}
Ok(())
}
/// merge all the files into w
pub fn merge(&self, files: &[String], cmp: &mut LineCompList, w: impl Write, unique: bool) -> Result<()> {
let tmp = TempDir::new("merge")?;
if self.alt_merge {
self.merge_t1(files, cmp, w, unique, &tmp)
} else {
self.merge_t2(files, cmp, w, unique, &tmp)
}
}
/// given two file names, merge them into output
pub fn merge_2(
&self,
left: &str,
right: &str,
cmp: &mut LineCompList,
mut w: impl Write,
unique: bool,
) -> Result<()> {
let mut left_file = Reader::new();
let mut right_file = Reader::new();
left_file.open(left)?;
right_file.open(right)?;
left_file.do_split(false);
right_file.do_split(false);
cmp.lookup(&left_file.names())?;
// FIXME -- Check Header
if left_file.has_header() {
w.write_all(left_file.header().line.as_bytes())?;
}
if unique {
let mut prev: Vec<u8> = Vec::new();
while !left_file.is_done() && !right_file.is_done() {
let ord = cmp.comp_lines(left_file.curr().line(), right_file.curr().line());
if ord == Ordering::Less {
left_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
left_file.getline()?;
} else if ord == Ordering::Greater {
right_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
right_file.getline()?;
} else {
left_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
left_file.getline()?;
right_file.getline()?;
}
while !left_file.is_done() && cmp.equal_lines(left_file.curr().line(), &prev) {
left_file.getline()?;
}
while !right_file.is_done() && cmp.equal_lines(right_file.curr().line(), &prev) {
right_file.getline()?;
}
}
} else {
while !left_file.is_done() && !right_file.is_done() {
let ord = cmp.comp_lines(left_file.curr().line(), right_file.curr().line());
// if Equal, write both lines
if ord != Ordering::Less {
right_file.write(&mut w)?;
right_file.getline()?;
}
if ord != Ordering::Greater {
left_file.write(&mut w)?;
left_file.getline()?;
}
}
}
while !left_file.is_done() {
left_file.write(&mut w)?;
left_file.getline()?;
}
while !right_file.is_done() {
right_file.write(&mut w)?;
right_file.getline()?;
}
Ok(())
}
/// Sort all the files together, into w
pub fn sort<W: Write>(&self, files: &[String], cmp: LineCompList, w: &mut W, unique: bool) -> Result<()> // maybe return some useful stats?
{
let mut s = Sorter::new(cmp, 500000000, unique);
for fname in files {
s.add_file(fname, w)?;
}
s.finalize(w)?;
// s.no_del();
Ok(())
}
}
/// Large block of text and pointers to lines therein
#[allow(missing_debug_implementations)]
pub struct Sorter {
config : SortConfig,
ptrs: Vec<Item>,
cmp: LineCompList,
tmp: TempDir,
tmp_files: Vec<String>,
unique: bool,
checker: HeaderChecker,
// raw data. never resized smaller, so use data_used for real size
data: Vec<u8>,
// bytes of real data in Vec
data_used: usize,
// bytes of data referenced by ptrs
// a.k.a. offset of first byte not referenced by ptrs
// assert(data_calc <= data_used)
data_calc: usize,
// number of btes beyond data_calc, known to be free of newlines
// to avoid N^2 craziness with long lines
// assert(data_calc+data_nonl <= data_used)
data_nonl: usize,
}
const MAX_DATA: usize = 0x0ffffff00;
impl Sorter {
/// new Sorter
pub fn new(cmp: LineCompList, max_alloc: usize, unique: bool) -> Self {
let mut data_size = max_alloc / 2;
if data_size > MAX_DATA {
data_size = MAX_DATA;
}
let ptr_size = max_alloc / 2 / std::mem::size_of::<Item>();
Self {
config : SortConfig::default(),
ptrs: Vec::with_capacity(ptr_size),
data: Vec::with_capacity(data_size),
cmp,
tmp: TempDir::new("sort").unwrap(), // FIXME - new should return Result
tmp_files: Vec::new(),
unique,
checker: HeaderChecker::new(),
data_used: 0,
data_calc: 0,
data_nonl: 0,
}
}
fn check(&self) -> bool {
debug_assert!(self.data_used <= self.data.len());
debug_assert!(self.data_calc <= self.data_used);
debug_assert!((self.data_calc + self.data_nonl) <= self.data_used);
true
}
// number of bytes available to write
fn avail(&self) -> usize {
self.data.len() - self.data_used
}
// try to make N bytes available, return amount actually available
fn prepare(&mut self, n: usize) -> usize {
let mut nsize = self.data_used + n;
if nsize > self.data.capacity() {
nsize = self.data.capacity();
}
if self.data.len() < nsize {
self.data.resize(nsize, 0);
}
let avail = self.avail();
if avail < n {
avail
} else {
n
}
}
/// add some more data to be sorted.
/// must be integer number of lines.
pub fn add_data(&mut self, in_data: &[u8]) -> Result<()> {
let sz = self.prepare(in_data.len());
if sz != in_data.len() {
eprintln!("Failed to prepare {}, only got {}", in_data.len(), sz);
return err!("Badness");
}
self.data[self.data_used..self.data_used + in_data.len()].copy_from_slice(in_data);
self.data_used += in_data.len();
// FIXME - add newline
Ok(())
}
/// Add another file's worth of data to the stream
/// possibly writing temporary files
pub fn add(&mut self, mut r: impl Read) -> Result<()> {
loop {
debug_assert!(self.check());
const SIZE: usize = 16 * 1024;
let sz = self.prepare(SIZE);
debug_assert!(sz > 0);
let nbytes = r.read(&mut self.data[self.data_used..self.data_used + sz])?;
if nbytes == 0 {
if self.data_used > 0 && self.data[self.data_used - 1] != b'\n' {
self.data[self.data_used] = b'\n';
self.data_used += 1;
}
return Ok(());
}
self.data_used += nbytes;
// calc new stuff
if self.data_used >= self.data.capacity() {
self.calc();
self.do_sort();
self.write_tmp()?;
}
}
}
/// Populate 'ptrs' from 'data'
fn calc(&mut self) {
self.ptrs.clear();
let mut item = Item::new();
let mut off: usize = 0;
for iter in self.data[0..self.data_used].iter().enumerate() {
if iter.1 == &b'\n' {
item.offset = off as u32;
item.size_plus = (iter.0 - off + 1) as u32;
off = iter.0 + 1;
self.cmp.fill_cache_line(&mut item, &self.data);
self.ptrs.push(item);
}
}
self.data_calc = off;
}
/// write ptrs to tmp file
fn write_tmp(&mut self) -> Result<()> {
let mut tmp_file = self.tmp.path().to_owned();
tmp_file.push(format!("sort_{}.txt", self.tmp_files.len()));
let tmp_name = tmp_file.to_str().unwrap();
let mut new_w = get_writer(tmp_name)?;
for &x in &self.ptrs {
new_w.write_all(x.get(&self.data))?;
}
self.tmp_files.push(tmp_name.to_string());
self.ptrs.clear();
let nsize = self.data.len() - self.data_calc;
for i in 0..nsize {
self.data[i] = self.data[self.data_calc + i];
}
self.data_used = nsize;
self.data_calc = 0;
Ok(())
}
/// sort and unique self.ptrs
fn do_sort(&mut self) {
if self.config.alt_sort {
do_sort_lines(&self.data, &mut self.ptrs, &mut self.cmp);
} else {
self.ptrs.sort_by(|a, b| self.cmp.comp_items(&self.data, a, b));
}
if self.unique {
self.ptrs
.dedup_by(|a, b| self.cmp.equal_items(&self.data, a, b));
}
}
/// All files have been added, write final results
pub fn finalize(&mut self, mut w: impl Write) -> Result<()> {
self.calc();
self.do_sort();
if self.tmp_files.is_empty() {
for &x in &self.ptrs {
w.write_all(x.get(&self.data))?;
}
} else {
self.write_tmp()?;
self.config.merge_t(&self.tmp_files, &mut self.cmp, w, self.unique, &self.tmp)?;
}
Ok(())
}
#[allow(dead_code)]
fn no_del(self) {
eprintln!(
"Not deleting {}",
self.tmp.into_path().into_os_string().to_string_lossy()
);
}
/// add another file to be sorted
pub fn add_file<W: Write>(&mut self, fname: &str, w: &mut W) -> Result<()> {
let mut f = get_reader(fname)?;
let mut first_line = Vec::new();
let n = f.read_until(b'\n', &mut first_line)?;
if n == 0 {
return Ok(());
}
if self.checker.check(&first_line, fname)? {
let s = make_header(&first_line);
self.cmp.lookup(&s.vec())?;
if is_cdx(&first_line) {
w.write_all(&first_line)?;
} else {
self.add_data(&first_line)?;
}
}
self.add(&mut *f)
}
}
type NodeType = Box<MergeTreeItem>;
struct NodeData {
left: NodeType,
right: NodeType,
left_data: Option<usize>,
right_data: Option<usize>,
// done : bool, Optimization?
}
impl NodeData {
fn new(left: NodeType, right: NodeType) -> Self {
Self {
left,
right,
left_data: None,
right_data: None,
}
}
fn left_cols<'a>(&self, files: &'a [Reader]) -> &'a TextLine {
files[self.left_data.unwrap()].curr_line()
}
fn right_cols<'a>(&self, files: &'a [Reader]) -> &'a TextLine {
files[self.right_data.unwrap()].curr_line()
}
}
struct LeafData {
file_num: usize,
first: bool,
}
enum MergeTreeItem {
Leaf(LeafData),
Node(NodeData),
}
impl MergeTreeItem {
fn new_tree(files: &[Reader], nums: &[usize]) -> Self {
if nums.is_empty() {
panic!("Can't make a MergeTreeItem from zero files")
} else if nums.len() == 1 {
Self::new_leaf(nums[0])
} else {
let mid = nums.len() / 2;
Self::new_node(
Box::new(Self::new_tree(files, &nums[..mid])),
Box::new(Self::new_tree(files, &nums[mid..])),
)
}
}
fn new_node(left: NodeType, right: NodeType) -> Self {
Self::Node(NodeData::new(left, right))
}
const fn new_leaf(r: usize) -> Self {
Self::Leaf(LeafData {
file_num: r,
first: true,
})
}
fn next(&mut self, cmp: &mut LineCompList, files: &mut [Reader]) -> Result<Option<usize>> {
match self {
Self::Leaf(r) => {
if files[r.file_num].is_done() {
Ok(None)
} else {
if r.first {
r.first = false;
} else if files[r.file_num].getline()? {
return Ok(None);
}
Ok(Some(r.file_num))
}
}
Self::Node(n) => {
if n.left_data.is_none() {
n.left_data = n.left.next(cmp, files)?;
}
if n.right_data.is_none() {
n.right_data = n.right.next(cmp, files)?;
}
if n.left_data.is_none() && n.right_data.is_none() {
Ok(None)
} else if n.left_data.is_none() {
let tmp = n.right_data;
n.right_data = None;
Ok(tmp)
} else if n.right_data.is_none() {
let tmp = n.left_data;
n.left_data = None;
Ok(tmp)
} else {
let c = cmp.comp_cols(n.left_cols(files), n.right_cols(files));
if c == Ordering::Greater {
let tmp = n.right_data;
n.right_data = None;
Ok(tmp)
} else {
let tmp = n.left_data;
n.left_data = None;
Ok(tmp)
}
}
}
}
}
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
pub fn show(file: &str, screen: &Rect) -> Result<()> {
let mut f = Reader::new();
f.open(file)?;
if f.is_empty() {
return Ok(());
}
let mut lines: Vec<StringLine> = Vec::new();
let mut sizes: Vec<usize> = Vec::new();
sizes.resize(f.header().len(), 0);
if f.has_header() {
lines.push(f.header().clone());
}
if !f.is_done() {
while lines.len() < screen.height {
let mut s = StringLine::new();
s.line = String::from_utf8_lossy(f.curr().line()).to_string();
s.split(f.delim());
lines.push(s);
if f.getline()? {
break;
}
}
}
for x in &lines {
for (i, c) in x.iter().enumerate() {
let width = UnicodeWidthStr::width(c);
if sizes[i] < width {
sizes[i] = width;
}
}
}
let mut total: usize = sizes.iter().sum();
let target = screen.width - sizes.len();
while total > target {
dec_max(&mut sizes);
total = sizes.iter().sum();
}
let mut w = get_writer("-")?;
let mut do_center = f.has_header();
for x in &lines {
let mut need_space = false;
for (c, y) in x.iter().enumerate() {
let (nstr, width) = y.unicode_truncate(sizes[c]);
if need_space {
w.write_all(b" ")?;
}
let num = (sizes[c] - width) / 2;
if do_center {
for _ in 0..num {
w.write_all(b" ")?;
}
}
w.write_all(nstr.as_bytes())?;
if do_center {
let num2 = (sizes[c] - width) - num;
for _ in 0..num2 {
w.write_all(b" ")?;
}
} else {
for _ in width..sizes[c] {
w.write_all(b" ")?;
}
}
need_space = true;
}
do_center = false;
w.write_all(b"\n")?;
}
Ok(())
}
/// show the file in a specific rectangle
pub fn show2(file: &str, screen: &Rect, w: &mut Vec<String>) -> Result<usize> {
let mut f = Reader::new();
f.open(file)?;
if f.is_empty() {
return Ok(0);
}
let mut lines: Vec<StringLine> = Vec::new();
let mut sizes: Vec<usize> = Vec::new();
sizes.resize(f.header().len(), 0);
if f.has_header() {
lines.push(f.header().clone());
}
if !f.is_done() {
while lines.len() < screen.height {
let mut s = StringLine::new();
s.line = String::from_utf8_lossy(f.curr().line()).to_string();
s.split(f.delim());
lines.push(s);
if f.getline()? {
break;
}
}
}
for x in &lines {
for (i, c) in x.iter().enumerate() {
let width = UnicodeWidthStr::width(c);
if sizes[i] < width {
sizes[i] = width;
}
}
}
let mut total: usize = sizes.iter().sum();
let target = screen.width - sizes.len();
while total > target {
dec_max(&mut sizes);
total = sizes.iter().sum();
}
let mut do_center = f.has_header();
w.clear();
for x in &lines {
let mut s = String::new();
let mut need_space = false;
for (c, y) in x.iter().enumerate() {
let (nstr, width) = y.unicode_truncate(sizes[c]);
if need_space {
s.push(' ');
}
let num = (sizes[c] - width) / 2;
if do_center {
for _ in 0..num {
s.push(' ');
}
}
s.push_str(nstr);
if do_center {
let num2 = (sizes[c] - width) - num;
for _ in 0..num2 {
s.push(' ');
}
} else {
for _ in width..sizes[c] {
s.push(' ');
}
}
need_space = true;
}
do_center = false;
w.push(s);
}
Ok(sizes.iter().sum::<usize>() + sizes.len())
}
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Select columns", args::FileCount::Many);
const A: [ArgSpec; 5] = [
arg! {"fields", "f", "Columns", "the columns to select."},
arg! {"group", "g", "Columns", "the columns in a bunch, e.g. '.group:1-3'"},
arg! {"expr", "e", "Name:Expr", "The result of an arithmetic expression"},
arg! {"composite", "c", "Spec", "new value made from parts. e.g. 'stuff:abc^{two}def'"},
arg_enum! {"dups", "D", "Mode", "Duplicate Column Handling", &["Fail", "Allow", "Numeric"]},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut header = ColumnHeader::new();
let mut v = Writer::new(b'\t');
for x in args {
if x.name == "dups" {
header.set_handling(DupColHandling::new(&x.value)?);
} else if x.name == "fields" {
v.push(Box::new(ReaderColumns::new(ColumnSet::from_spec(
&x.value,
)?)));
} else if x.name == "group" {
v.push(Box::new(ColumnClump::from_spec(&x.value)?));
} else if x.name == "expr" {
v.push(Box::new(ColumnExpr::new(&x.value)?));
} else if x.name == "composite" {
v.push(Box::new(CompositeColumn::new(&x.value)?));
} else {
unreachable!();
}
}
if v.is_empty() {
bail!("cut requires at lease one --columns or --groups");
}
let mut w = get_writer("-")?;
let mut not_header = String::new();
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if f.is_empty() {
continue;
}
v.lookup(&f.names())?;
header.clear();
not_header.clear();
v.add_names(&mut header, f.header())?;
if f.has_header() {
not_header = header.get_head(b'\t');
}
if settings.checker.check(not_header.as_bytes(), x)? {
w.write_all(not_header.as_bytes())?;
}
if f.is_done() {
continue;
}
loop {
v.write(&mut w.0, f.curr())?;
if f.getline()? {
break;
}
}
}
Ok(())
}
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Select uniq lines.", args::FileCount::Many);
const A: [ArgSpec; 6] = [
arg! {"pattern", "p", "Col,Spec,Pattern", "Select line where this col matches this pattern."},
arg! {"show-const", "", "", "Print available constants"},
arg! {"show-func", "", "", "Print available functions"},
arg! {"or", "o", "", "A line matches if any of the matchers matches."},
arg! {"invert", "v", "", "Print lines that don't match."},
arg! {"location", "l", "name:what", "prefix extra columns of location context."},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut list = LineMatcherList::new_with(Combiner::And);
let mut reverse = false;
let mut loc = FileLocList::new();
for x in args {
if x.name == "pattern" {
list.push(&x.value)?;
} else if x.name == "or" {
list.multi = Combiner::Or;
} else if x.name == "invert" {
reverse = true;
} else if x.name == "location" {
loc.push(&x.value)?;
} else if x.name == "show-const" {
expr::show_const();
return Ok(());
} else if x.name == "show-func" {
expr::show_func();
return Ok(());
} else {
unreachable!();
}
}
let mut w = get_writer("-")?;
for x in &files {
let mut f = Reader::new_open(x)?;
if f.is_empty() {
continue;
}
list.lookup(&f.names())?;
let mut not_header = String::new();
let mut header = ColumnHeader::new();
loc.add(&mut header)?;
header.push_all(f.header())?;
if f.has_header() {
not_header = header.get_head(b'\t');
}
if settings.checker.check(not_header.as_bytes(), x)? {
w.write_all(not_header.as_bytes())?;
}
if f.is_done() {
continue;
}
loop {
if list.ok(f.curr_line()) ^ reverse {
// write previous lines of context if necessary
loc.write_data(&mut w.0, b'\t', f.loc())?;
f.write_curr(&mut w.0)?;
} else {
// write more lines of context if necessary
}
if f.getline()? {
break;
}
}
}
Ok(())
}
sourcepub const fn line_number(&self) -> usize
pub const fn line_number(&self) -> usize
line number of curr_line
Examples found in repository
1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932
pub fn comp_check(f: &Reader, cmp: &mut LineCompList, unique: bool) -> bool {
let c = cmp.comp_cols(f.prev_line(1), f.curr_line());
let bad = match c {
Ordering::Less => false,
Ordering::Equal => unique,
Ordering::Greater => true,
};
if c == Ordering::Equal && unique {
eprintln!("Lines are equal when they should be unique.");
} else if bad {
eprintln!("Lines are out of order");
}
if bad {
eprint!("{} : ", f.line_number() - 1);
prerr_n(&[f.prev_line(1).line()]);
eprint!("{} : ", f.line_number());
prerr_n(&[f.curr_line().line()]);
}
bad
}
More examples
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Sample lines from files.", args::FileCount::Many);
const A: [ArgSpec; 4] = [
arg_enum! {"header", "h", "Mode", "header requirements", &HEADER_MODE},
arg! {"for", "f", "by,from,to", "for i=from; i<=to; i+= by"},
arg! {"sample", "s", "Number", "Select this number of lines, more or less evenly spaced."},
arg! {"range", "r", "Ranges", "e.g. 1-5,42,95-106."},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut checker = HeaderChecker::new();
let mut floop = For::default();
let mut ranges = Ranges::default();
let mut sample = 10;
let mut saw_sample = false;
let mut saw_for = false;
let mut saw_range = false;
for x in args {
if x.name == "header" {
checker.mode = HeaderMode::from_str(&x.value)?;
} else if x.name == "for" {
if saw_for || saw_sample || saw_range {
return err!("No more that one --sample, --range or --for allowed");
}
floop = For::new(&x.value)?;
saw_for = true;
} else if x.name == "range" {
if saw_for || saw_sample || saw_range {
return err!("No more that one --sample, --range or --for allowed");
}
saw_range = true;
for x in x.value.split(',') {
ranges.push(x)?;
}
} else if x.name == "sample" {
if saw_for || saw_sample || saw_range {
return err!("No more that one --sample, --range or --for allowed");
}
sample = x.value.to_usize_whole(x.value.as_bytes(), "sample size")?;
saw_sample = true;
} else {
unreachable!();
}
}
if !saw_for && !saw_sample && !saw_range {
saw_sample = true;
sample = Rect::from_screen().height;
}
let mut w = get_writer("-")?;
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if f.is_empty() {
continue;
}
if checker.check_file(&f, x)? {
w.write_all(f.header().line.as_bytes())?;
}
if f.is_done() {
return Ok(());
}
f.do_split(false);
if saw_sample {
let mut s = Smooth::new(sample);
loop {
s.add(f.curr().line());
if f.getline()? {
break;
}
}
s.finalize(&mut w.0)?;
} else if saw_for {
let mut next = floop.from;
while f.line_number() <= floop.to {
if f.line_number() == next {
w.write_all(f.curr_line().line())?;
next += floop.by;
}
if f.getline()? {
break;
}
}
} else {
let max = ranges.max();
while f.line_number() <= max {
if ranges.contains(f.line_number()) {
w.write_all(f.curr_line().line())?;
}
if f.getline()? {
break;
}
}
}
}
Ok(())
}
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Verify file contents.", args::FileCount::Many);
const A: [ArgSpec; 10] = [
arg! {"report", "r", "Number", "How many failures to report before exit."},
arg! {"first", "f", "Op,Value", "'FirstLine Op Value' must be true. E.g LT,a for first line is less than 'a'."},
arg! {"last", "l", "Op,Value", "'LastLine Op Value' must be true."},
arg! {"key", "k", "Spec", "How to compare adjacent lines"},
arg! {"sort", "s", "", "Check that the file is sorted."},
arg! {"unique", "u", "", "Check that the file is sorted, with unique lines."},
arg! {"pattern", "p", "Col,Spec,Pattern", "Select line where this col matches this pattern."},
arg! {"show-matchers", "", "", "Print available matchers"},
arg! {"show-const", "", "", "Print available constants"},
arg! {"show-func", "", "", "Print available functions"},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut list = LineMatcherList::new_with(Combiner::And);
let mut comp = LineCompList::new();
let mut do_sort = false;
let mut do_unique = false;
let mut max_fails = 5;
let mut first: Option<CheckLine> = None;
let mut last: Option<CheckLine> = None;
for x in args {
if x.name == "pattern" {
list.push(&x.value)?;
} else if x.name == "key" {
comp.add(&x.value)?;
} else if x.name == "or" {
list.multi = Combiner::Or;
} else if x.name == "fail" {
max_fails = x.value.to_usize_whole(x.value.as_bytes(), "max fails")?;
} else if x.name == "sort" {
do_sort = true;
} else if x.name == "first" {
first = Some(CheckLine::new(&x.value)?);
} else if x.name == "last" {
last = Some(CheckLine::new(&x.value)?);
} else if x.name == "unique" {
do_sort = true;
do_unique = true;
} else if x.name == "show-const" {
expr::show_const();
return Ok(());
} else if x.name == "show-func" {
expr::show_func();
return Ok(());
} else {
unreachable!();
}
}
if comp.is_empty() {
comp.add("")?;
}
let mut fails = 0;
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if f.is_empty() {
continue;
}
list.lookup(&f.names())?;
comp.lookup(&f.names())?;
if f.is_done() {
continue;
}
if first.is_some()
&& !first.as_ref().unwrap().line_ok_verbose(
f.curr_line(),
&mut comp,
f.line_number(),
)?
{
fails += 1;
}
let num_cols = f.names().len();
loop {
let mut did_fail = false;
if f.curr().len() != num_cols {
eprintln!(
"Expected {num_cols} columns, but line {} of {} had {}",
f.line_number() + 1,
x,
f.curr().len()
);
did_fail = true;
}
if !list.ok_verbose(f.curr_line(), f.line_number(), x) {
did_fail = true;
}
if f.getline()? {
if last.is_some()
&& !last.as_ref().unwrap().line_ok_verbose(
f.prev_line(1),
&mut comp,
f.line_number() - 1,
)?
{
fails += 1;
}
break;
}
if do_sort {
did_fail = did_fail || comp_check(&f, &mut comp, do_unique);
}
if did_fail {
fails += 1;
if fails >= max_fails {
break;
}
}
}
if fails > 0 {
return cdx_err(CdxError::Silent);
}
}
Ok(())
}
sourcepub fn getline(&mut self) -> Result<bool>
pub fn getline(&mut self) -> Result<bool>
get next line of text
Examples found in repository
594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666
fn load_hashset(data: &mut HashSet<Vec<u8>>, fname: &str) -> Result<()> {
let mut f = Reader::new();
f.do_split(false);
f.open(fname)?;
if f.is_done() {
return Ok(());
}
loop {
let line = &f.curr().line();
if line.len() > 1 {
data.insert(line[0..line.len() - 1].to_vec());
}
if f.getline()? {
break;
}
}
Ok(())
}
#[derive(Debug, Clone)]
/// pattern is file name. String exactly matches one line of file.
struct FileExactMatch {
data: HashSet<Vec<u8>>,
file_name: String,
}
impl FileExactMatch {
fn new(file_name: &str) -> Result<Self> {
let mut d = HashSet::new();
load_hashset(&mut d, file_name)?;
Ok(Self {
data: d,
file_name: file_name.to_string(),
})
}
}
impl Match for FileExactMatch {
fn smatch(&self, buff: &str) -> bool {
self.data.contains(buff.as_bytes())
}
fn umatch(&self, buff: &[u8]) -> bool {
self.data.contains(buff)
}
fn show(&self) -> String {
format!("Exact Match of one line in file {}", self.file_name)
}
}
fn load_hashset_c(data: &mut HashSet<Vec<u8>>, fname: &str, unicode: bool) -> Result<()> {
let mut f = Reader::new();
f.do_split(false);
f.open(fname)?;
if f.is_done() {
return Ok(());
}
loop {
let mut line: &[u8] = f.curr().line();
if line.len() > 1 {
if line.last().unwrap() == &b'\n' {
line = &line[..line.len() - 1];
}
if unicode {
data.insert(String::from_utf8(line.to_vec())?.new_lower().into_bytes());
// PERF - 2 allocations
} else {
data.insert(line.new_lower());
}
}
if f.getline()? {
break;
}
}
Ok(())
}
More examples
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
pub fn transpose(file: &str, head: bool, max_lines: usize) -> Result<()> {
let mut f = Reader::new();
f.open(file)?;
if f.is_empty() {
return Ok(());
}
let mut data = Vec::new();
let mut lines = 0;
while lines < max_lines {
data.push(f.curr().clone());
lines += 1;
if f.getline()? {
break;
}
}
let mut w = get_writer("-")?;
if head {
w.write_all(b" CDX\t")?;
}
for i in 0..f.header().len() {
let mut need_tab = if f.has_header() {
w.write_all(f.header()[i].as_bytes())?;
true
} else {
false
};
for x in &data {
if need_tab {
w.write_all(b"\t")?;
}
need_tab = true;
w.write_all(&x[i])?;
}
w.write_all(b"\n")?;
}
Ok(())
}
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Evaluate Formatted Expressions.", args::FileCount::Many);
const A: [ArgSpec; 1] = [arg! {"fmt", "f", "Format", "How to format values."}];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut fmt = NumFormat::default();
for x in args {
if x.name == "fmt" {
fmt = NumFormat::new(&x.value)?;
} else {
unreachable!();
}
}
let mut w = get_writer("-")?;
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if f.is_empty() || f.is_done() {
continue;
}
loop {
let exp = &String::from_utf8_lossy(f.curr_nl());
let (f2, exp) = parse_fmt_expr(fmt, exp);
fmt = f2;
match calc(exp) {
Ok(v) => {
fmt.print(v, &mut w.0)?;
w.write_all(b"\n")?;
w.flush()?;
}
Err(e) => eprintln!("{}", e),
}
if f.getline()? {
break;
}
}
}
Ok(())
}
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607
pub fn merge_t2(
&self,
in_files: &[String],
cmp: &mut LineCompList,
mut w: impl Write,
unique: bool,
_tmp: &TempDir,
) -> Result<()> {
if in_files.is_empty() {
return Ok(());
}
if in_files.len() == 1 && !unique {
let r = get_reader(&in_files[0])?;
return copy(r.0, w);
}
let mc = Rc::new(RefCell::new(MergeContext{open : Vec::with_capacity(in_files.len()), cmp}));
let mut heap = BinaryHeap::new_by(|a: &usize, b: &usize| mc.borrow_mut().compare(*a, *b));
{
let mut mcm = mc.borrow_mut();
for x in in_files {
mcm.open.push(Reader::new_open(x)?);
}
if !mcm.cmp.need_split() {
for x in &mut mcm.open {
x.do_split(false);
}
}
// FIXME -- Check Header
if mcm.open[0].has_header() {
w.write_all(mcm.open[0].header().line.as_bytes())?;
}
}
for i in 0..in_files.len() {
if !mc.borrow().open[i].is_done() {
heap.push(i)
}
}
if unique {
if heap.is_empty() {
return Ok(());
}
let first = heap.pop().unwrap();
let mut prev = mc.borrow().open[first].curr_line().clone();
if !mc.borrow_mut().open[first].getline()? {
heap.push(first);
}
w.write_all(prev.line())?;
while !heap.is_empty() {
if let Some(x) = heap.pop() {
let eq = mc.borrow_mut().equal(&prev, x);
if !eq {
let mcm = mc.borrow();
w.write_all(mcm.open[x].curr_line().line())?;
prev.assign(mcm.open[x].curr_line());
}
if !mc.borrow_mut().open[x].getline()? {
heap.push(x);
}
}
}
} else {
while !heap.is_empty() {
if let Some(x) = heap.pop() {
w.write_all(mc.borrow_mut().open[x].curr_line().line())?;
if !mc.borrow_mut().open[x].getline()? {
heap.push(x);
}
}
}
}
Ok(())
}
/// merge all the files into w, using tmp
pub fn merge_t1 (
&self,
in_files: &[String],
cmp: &mut LineCompList,
mut w: impl Write,
unique: bool,
_tmp: &TempDir,
) -> Result<()> {
if in_files.is_empty() {
return Ok(());
}
if in_files.len() == 1 && !unique {
let r = get_reader(&in_files[0])?;
return copy(r.0, w);
}
let mut open_files: Vec<Reader> = Vec::with_capacity(in_files.len());
for x in in_files {
open_files.push(Reader::new_open(x)?);
}
if !cmp.need_split() {
for x in &mut open_files {
x.do_split(false);
}
}
// FIXME -- Check Header
if open_files[0].has_header() {
w.write_all(open_files[0].header().line.as_bytes())?;
}
let nums: Vec<usize> = (0..open_files.len()).collect();
let mut mm = MergeTreeItem::new_tree(&open_files, &nums);
if unique {
let x = mm.next(cmp, &mut open_files)?;
if x.is_none() {
return Ok(());
}
let x = x.unwrap();
w.write_all(open_files[x].curr_line().line())?;
let mut prev = open_files[x].curr_line().clone();
loop {
let x = mm.next(cmp, &mut open_files)?;
if x.is_none() {
break;
}
let x = x.unwrap();
if !cmp.equal_cols(&prev, open_files[x].curr_line()) {
w.write_all(open_files[x].curr_line().line())?;
}
prev.assign(open_files[x].curr_line());
}
} else {
loop {
let x = mm.next(cmp, &mut open_files)?;
if x.is_none() {
break;
}
let x = x.unwrap();
w.write_all(open_files[x].curr_line().line())?;
}
}
Ok(())
}
/// merge all the files into w
pub fn merge(&self, files: &[String], cmp: &mut LineCompList, w: impl Write, unique: bool) -> Result<()> {
let tmp = TempDir::new("merge")?;
if self.alt_merge {
self.merge_t1(files, cmp, w, unique, &tmp)
} else {
self.merge_t2(files, cmp, w, unique, &tmp)
}
}
/// given two file names, merge them into output
pub fn merge_2(
&self,
left: &str,
right: &str,
cmp: &mut LineCompList,
mut w: impl Write,
unique: bool,
) -> Result<()> {
let mut left_file = Reader::new();
let mut right_file = Reader::new();
left_file.open(left)?;
right_file.open(right)?;
left_file.do_split(false);
right_file.do_split(false);
cmp.lookup(&left_file.names())?;
// FIXME -- Check Header
if left_file.has_header() {
w.write_all(left_file.header().line.as_bytes())?;
}
if unique {
let mut prev: Vec<u8> = Vec::new();
while !left_file.is_done() && !right_file.is_done() {
let ord = cmp.comp_lines(left_file.curr().line(), right_file.curr().line());
if ord == Ordering::Less {
left_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
left_file.getline()?;
} else if ord == Ordering::Greater {
right_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
right_file.getline()?;
} else {
left_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
left_file.getline()?;
right_file.getline()?;
}
while !left_file.is_done() && cmp.equal_lines(left_file.curr().line(), &prev) {
left_file.getline()?;
}
while !right_file.is_done() && cmp.equal_lines(right_file.curr().line(), &prev) {
right_file.getline()?;
}
}
} else {
while !left_file.is_done() && !right_file.is_done() {
let ord = cmp.comp_lines(left_file.curr().line(), right_file.curr().line());
// if Equal, write both lines
if ord != Ordering::Less {
right_file.write(&mut w)?;
right_file.getline()?;
}
if ord != Ordering::Greater {
left_file.write(&mut w)?;
left_file.getline()?;
}
}
}
while !left_file.is_done() {
left_file.write(&mut w)?;
left_file.getline()?;
}
while !right_file.is_done() {
right_file.write(&mut w)?;
right_file.getline()?;
}
Ok(())
}
/// Sort all the files together, into w
pub fn sort<W: Write>(&self, files: &[String], cmp: LineCompList, w: &mut W, unique: bool) -> Result<()> // maybe return some useful stats?
{
let mut s = Sorter::new(cmp, 500000000, unique);
for fname in files {
s.add_file(fname, w)?;
}
s.finalize(w)?;
// s.no_del();
Ok(())
}
}
/// Large block of text and pointers to lines therein
#[allow(missing_debug_implementations)]
pub struct Sorter {
config : SortConfig,
ptrs: Vec<Item>,
cmp: LineCompList,
tmp: TempDir,
tmp_files: Vec<String>,
unique: bool,
checker: HeaderChecker,
// raw data. never resized smaller, so use data_used for real size
data: Vec<u8>,
// bytes of real data in Vec
data_used: usize,
// bytes of data referenced by ptrs
// a.k.a. offset of first byte not referenced by ptrs
// assert(data_calc <= data_used)
data_calc: usize,
// number of btes beyond data_calc, known to be free of newlines
// to avoid N^2 craziness with long lines
// assert(data_calc+data_nonl <= data_used)
data_nonl: usize,
}
const MAX_DATA: usize = 0x0ffffff00;
impl Sorter {
/// new Sorter
pub fn new(cmp: LineCompList, max_alloc: usize, unique: bool) -> Self {
let mut data_size = max_alloc / 2;
if data_size > MAX_DATA {
data_size = MAX_DATA;
}
let ptr_size = max_alloc / 2 / std::mem::size_of::<Item>();
Self {
config : SortConfig::default(),
ptrs: Vec::with_capacity(ptr_size),
data: Vec::with_capacity(data_size),
cmp,
tmp: TempDir::new("sort").unwrap(), // FIXME - new should return Result
tmp_files: Vec::new(),
unique,
checker: HeaderChecker::new(),
data_used: 0,
data_calc: 0,
data_nonl: 0,
}
}
fn check(&self) -> bool {
debug_assert!(self.data_used <= self.data.len());
debug_assert!(self.data_calc <= self.data_used);
debug_assert!((self.data_calc + self.data_nonl) <= self.data_used);
true
}
// number of bytes available to write
fn avail(&self) -> usize {
self.data.len() - self.data_used
}
// try to make N bytes available, return amount actually available
fn prepare(&mut self, n: usize) -> usize {
let mut nsize = self.data_used + n;
if nsize > self.data.capacity() {
nsize = self.data.capacity();
}
if self.data.len() < nsize {
self.data.resize(nsize, 0);
}
let avail = self.avail();
if avail < n {
avail
} else {
n
}
}
/// add some more data to be sorted.
/// must be integer number of lines.
pub fn add_data(&mut self, in_data: &[u8]) -> Result<()> {
let sz = self.prepare(in_data.len());
if sz != in_data.len() {
eprintln!("Failed to prepare {}, only got {}", in_data.len(), sz);
return err!("Badness");
}
self.data[self.data_used..self.data_used + in_data.len()].copy_from_slice(in_data);
self.data_used += in_data.len();
// FIXME - add newline
Ok(())
}
/// Add another file's worth of data to the stream
/// possibly writing temporary files
pub fn add(&mut self, mut r: impl Read) -> Result<()> {
loop {
debug_assert!(self.check());
const SIZE: usize = 16 * 1024;
let sz = self.prepare(SIZE);
debug_assert!(sz > 0);
let nbytes = r.read(&mut self.data[self.data_used..self.data_used + sz])?;
if nbytes == 0 {
if self.data_used > 0 && self.data[self.data_used - 1] != b'\n' {
self.data[self.data_used] = b'\n';
self.data_used += 1;
}
return Ok(());
}
self.data_used += nbytes;
// calc new stuff
if self.data_used >= self.data.capacity() {
self.calc();
self.do_sort();
self.write_tmp()?;
}
}
}
/// Populate 'ptrs' from 'data'
fn calc(&mut self) {
self.ptrs.clear();
let mut item = Item::new();
let mut off: usize = 0;
for iter in self.data[0..self.data_used].iter().enumerate() {
if iter.1 == &b'\n' {
item.offset = off as u32;
item.size_plus = (iter.0 - off + 1) as u32;
off = iter.0 + 1;
self.cmp.fill_cache_line(&mut item, &self.data);
self.ptrs.push(item);
}
}
self.data_calc = off;
}
/// write ptrs to tmp file
fn write_tmp(&mut self) -> Result<()> {
let mut tmp_file = self.tmp.path().to_owned();
tmp_file.push(format!("sort_{}.txt", self.tmp_files.len()));
let tmp_name = tmp_file.to_str().unwrap();
let mut new_w = get_writer(tmp_name)?;
for &x in &self.ptrs {
new_w.write_all(x.get(&self.data))?;
}
self.tmp_files.push(tmp_name.to_string());
self.ptrs.clear();
let nsize = self.data.len() - self.data_calc;
for i in 0..nsize {
self.data[i] = self.data[self.data_calc + i];
}
self.data_used = nsize;
self.data_calc = 0;
Ok(())
}
/// sort and unique self.ptrs
fn do_sort(&mut self) {
if self.config.alt_sort {
do_sort_lines(&self.data, &mut self.ptrs, &mut self.cmp);
} else {
self.ptrs.sort_by(|a, b| self.cmp.comp_items(&self.data, a, b));
}
if self.unique {
self.ptrs
.dedup_by(|a, b| self.cmp.equal_items(&self.data, a, b));
}
}
/// All files have been added, write final results
pub fn finalize(&mut self, mut w: impl Write) -> Result<()> {
self.calc();
self.do_sort();
if self.tmp_files.is_empty() {
for &x in &self.ptrs {
w.write_all(x.get(&self.data))?;
}
} else {
self.write_tmp()?;
self.config.merge_t(&self.tmp_files, &mut self.cmp, w, self.unique, &self.tmp)?;
}
Ok(())
}
#[allow(dead_code)]
fn no_del(self) {
eprintln!(
"Not deleting {}",
self.tmp.into_path().into_os_string().to_string_lossy()
);
}
/// add another file to be sorted
pub fn add_file<W: Write>(&mut self, fname: &str, w: &mut W) -> Result<()> {
let mut f = get_reader(fname)?;
let mut first_line = Vec::new();
let n = f.read_until(b'\n', &mut first_line)?;
if n == 0 {
return Ok(());
}
if self.checker.check(&first_line, fname)? {
let s = make_header(&first_line);
self.cmp.lookup(&s.vec())?;
if is_cdx(&first_line) {
w.write_all(&first_line)?;
} else {
self.add_data(&first_line)?;
}
}
self.add(&mut *f)
}
}
type NodeType = Box<MergeTreeItem>;
struct NodeData {
left: NodeType,
right: NodeType,
left_data: Option<usize>,
right_data: Option<usize>,
// done : bool, Optimization?
}
impl NodeData {
fn new(left: NodeType, right: NodeType) -> Self {
Self {
left,
right,
left_data: None,
right_data: None,
}
}
fn left_cols<'a>(&self, files: &'a [Reader]) -> &'a TextLine {
files[self.left_data.unwrap()].curr_line()
}
fn right_cols<'a>(&self, files: &'a [Reader]) -> &'a TextLine {
files[self.right_data.unwrap()].curr_line()
}
}
struct LeafData {
file_num: usize,
first: bool,
}
enum MergeTreeItem {
Leaf(LeafData),
Node(NodeData),
}
impl MergeTreeItem {
fn new_tree(files: &[Reader], nums: &[usize]) -> Self {
if nums.is_empty() {
panic!("Can't make a MergeTreeItem from zero files")
} else if nums.len() == 1 {
Self::new_leaf(nums[0])
} else {
let mid = nums.len() / 2;
Self::new_node(
Box::new(Self::new_tree(files, &nums[..mid])),
Box::new(Self::new_tree(files, &nums[mid..])),
)
}
}
fn new_node(left: NodeType, right: NodeType) -> Self {
Self::Node(NodeData::new(left, right))
}
const fn new_leaf(r: usize) -> Self {
Self::Leaf(LeafData {
file_num: r,
first: true,
})
}
fn next(&mut self, cmp: &mut LineCompList, files: &mut [Reader]) -> Result<Option<usize>> {
match self {
Self::Leaf(r) => {
if files[r.file_num].is_done() {
Ok(None)
} else {
if r.first {
r.first = false;
} else if files[r.file_num].getline()? {
return Ok(None);
}
Ok(Some(r.file_num))
}
}
Self::Node(n) => {
if n.left_data.is_none() {
n.left_data = n.left.next(cmp, files)?;
}
if n.right_data.is_none() {
n.right_data = n.right.next(cmp, files)?;
}
if n.left_data.is_none() && n.right_data.is_none() {
Ok(None)
} else if n.left_data.is_none() {
let tmp = n.right_data;
n.right_data = None;
Ok(tmp)
} else if n.right_data.is_none() {
let tmp = n.left_data;
n.left_data = None;
Ok(tmp)
} else {
let c = cmp.comp_cols(n.left_cols(files), n.right_cols(files));
if c == Ordering::Greater {
let tmp = n.right_data;
n.right_data = None;
Ok(tmp)
} else {
let tmp = n.left_data;
n.left_data = None;
Ok(tmp)
}
}
}
}
}
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
pub fn show(file: &str, screen: &Rect) -> Result<()> {
let mut f = Reader::new();
f.open(file)?;
if f.is_empty() {
return Ok(());
}
let mut lines: Vec<StringLine> = Vec::new();
let mut sizes: Vec<usize> = Vec::new();
sizes.resize(f.header().len(), 0);
if f.has_header() {
lines.push(f.header().clone());
}
if !f.is_done() {
while lines.len() < screen.height {
let mut s = StringLine::new();
s.line = String::from_utf8_lossy(f.curr().line()).to_string();
s.split(f.delim());
lines.push(s);
if f.getline()? {
break;
}
}
}
for x in &lines {
for (i, c) in x.iter().enumerate() {
let width = UnicodeWidthStr::width(c);
if sizes[i] < width {
sizes[i] = width;
}
}
}
let mut total: usize = sizes.iter().sum();
let target = screen.width - sizes.len();
while total > target {
dec_max(&mut sizes);
total = sizes.iter().sum();
}
let mut w = get_writer("-")?;
let mut do_center = f.has_header();
for x in &lines {
let mut need_space = false;
for (c, y) in x.iter().enumerate() {
let (nstr, width) = y.unicode_truncate(sizes[c]);
if need_space {
w.write_all(b" ")?;
}
let num = (sizes[c] - width) / 2;
if do_center {
for _ in 0..num {
w.write_all(b" ")?;
}
}
w.write_all(nstr.as_bytes())?;
if do_center {
let num2 = (sizes[c] - width) - num;
for _ in 0..num2 {
w.write_all(b" ")?;
}
} else {
for _ in width..sizes[c] {
w.write_all(b" ")?;
}
}
need_space = true;
}
do_center = false;
w.write_all(b"\n")?;
}
Ok(())
}
/// show the file in a specific rectangle
pub fn show2(file: &str, screen: &Rect, w: &mut Vec<String>) -> Result<usize> {
let mut f = Reader::new();
f.open(file)?;
if f.is_empty() {
return Ok(0);
}
let mut lines: Vec<StringLine> = Vec::new();
let mut sizes: Vec<usize> = Vec::new();
sizes.resize(f.header().len(), 0);
if f.has_header() {
lines.push(f.header().clone());
}
if !f.is_done() {
while lines.len() < screen.height {
let mut s = StringLine::new();
s.line = String::from_utf8_lossy(f.curr().line()).to_string();
s.split(f.delim());
lines.push(s);
if f.getline()? {
break;
}
}
}
for x in &lines {
for (i, c) in x.iter().enumerate() {
let width = UnicodeWidthStr::width(c);
if sizes[i] < width {
sizes[i] = width;
}
}
}
let mut total: usize = sizes.iter().sum();
let target = screen.width - sizes.len();
while total > target {
dec_max(&mut sizes);
total = sizes.iter().sum();
}
let mut do_center = f.has_header();
w.clear();
for x in &lines {
let mut s = String::new();
let mut need_space = false;
for (c, y) in x.iter().enumerate() {
let (nstr, width) = y.unicode_truncate(sizes[c]);
if need_space {
s.push(' ');
}
let num = (sizes[c] - width) / 2;
if do_center {
for _ in 0..num {
s.push(' ');
}
}
s.push_str(nstr);
if do_center {
let num2 = (sizes[c] - width) - num;
for _ in 0..num2 {
s.push(' ');
}
} else {
for _ in width..sizes[c] {
s.push(' ');
}
}
need_space = true;
}
do_center = false;
w.push(s);
}
Ok(sizes.iter().sum::<usize>() + sizes.len())
}
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Select columns", args::FileCount::Many);
const A: [ArgSpec; 5] = [
arg! {"fields", "f", "Columns", "the columns to select."},
arg! {"group", "g", "Columns", "the columns in a bunch, e.g. '.group:1-3'"},
arg! {"expr", "e", "Name:Expr", "The result of an arithmetic expression"},
arg! {"composite", "c", "Spec", "new value made from parts. e.g. 'stuff:abc^{two}def'"},
arg_enum! {"dups", "D", "Mode", "Duplicate Column Handling", &["Fail", "Allow", "Numeric"]},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut header = ColumnHeader::new();
let mut v = Writer::new(b'\t');
for x in args {
if x.name == "dups" {
header.set_handling(DupColHandling::new(&x.value)?);
} else if x.name == "fields" {
v.push(Box::new(ReaderColumns::new(ColumnSet::from_spec(
&x.value,
)?)));
} else if x.name == "group" {
v.push(Box::new(ColumnClump::from_spec(&x.value)?));
} else if x.name == "expr" {
v.push(Box::new(ColumnExpr::new(&x.value)?));
} else if x.name == "composite" {
v.push(Box::new(CompositeColumn::new(&x.value)?));
} else {
unreachable!();
}
}
if v.is_empty() {
bail!("cut requires at lease one --columns or --groups");
}
let mut w = get_writer("-")?;
let mut not_header = String::new();
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if f.is_empty() {
continue;
}
v.lookup(&f.names())?;
header.clear();
not_header.clear();
v.add_names(&mut header, f.header())?;
if f.has_header() {
not_header = header.get_head(b'\t');
}
if settings.checker.check(not_header.as_bytes(), x)? {
w.write_all(not_header.as_bytes())?;
}
if f.is_done() {
continue;
}
loop {
v.write(&mut w.0, f.curr())?;
if f.getline()? {
break;
}
}
}
Ok(())
}
sourcepub fn curr_line(&self) -> &TextLine
pub fn curr_line(&self) -> &TextLine
get current line of text
Examples found in repository
1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131
pub fn curr_nl(&self) -> &[u8] {
let line = self.curr_line();
&line.line[0..line.line.len() - 1]
}
/// get previous line contents, without the trailing newline
pub fn prev_nl(&self, n: usize) -> &[u8] {
let line = self.prev_line(n);
&line.line[0..line.line.len() - 1]
}
/// get delimiter
pub const fn delim(&self) -> u8 {
self.cont.delim
}
/// get column names
pub fn names(&self) -> Vec<&str> {
self.cont.header.vec()
}
/// write the current text line with newline
pub fn write(&self, w: &mut impl Write) -> Result<()> {
w.write_all(&self.curr_line().line)?;
Ok(())
}
/// open file for reading
pub fn open(&mut self, name: &str) -> Result<()> {
self.file = get_reader(name)?;
self.cont.read_header(&mut *self.file, &mut self.lines[0])
}
/// The full text of the header, without the trailing newline
pub const fn header_line(&self) -> &String {
&self.cont.header.line
}
/// was file zero bytes?
pub const fn is_empty(&self) -> bool {
self.cont.is_empty
}
/// have we hit EOF?
pub const fn is_done(&self) -> bool {
self.cont.is_done
}
/// line number of curr_line
pub const fn line_number(&self) -> usize {
self.loc.line
}
fn incr(&mut self) {
self.loc.line += 1;
self.curr += 1;
if self.curr >= self.lines.len() {
self.curr = 0;
}
}
/// get next line of text
pub fn getline(&mut self) -> Result<bool> {
self.loc.bytes += self.curr().line.len();
self.incr();
if self.lines[self.curr].read(&mut *self.file)? {
self.cont.is_done = true;
} else if self.do_split {
self.lines[self.curr].split(self.cont.delim);
}
Ok(self.cont.is_done)
}
/// get current line of text
pub fn curr_line(&self) -> &TextLine {
&self.lines[self.curr]
}
/// get current line of text
pub fn curr_mut(&mut self) -> &mut TextLine {
&mut self.lines[self.curr]
}
/// get current line of text
pub fn curr(&self) -> &TextLine {
&self.lines[self.curr]
}
/// get a previous line of text
/// looking back from the start of the file shows empty lines.
pub fn prev_line(&self, lookback: usize) -> &TextLine {
if lookback <= self.curr {
&self.lines[self.curr - lookback]
} else {
&self.lines[self.curr + self.lines.len() - lookback]
}
}
/// write the current text line with newline
pub fn write_curr(&self, w: &mut impl Write) -> Result<()> {
w.write_all(&self.curr_line().line)?;
Ok(())
}
More examples
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527
fn compare(&mut self, a : usize, b : usize) -> Ordering {
self.cmp.comp_cols(self.open[a].curr_line(), self.open[b].curr_line()).reverse()
}
fn equal(&mut self, a : &TextLine, b : usize) -> bool {
self.cmp.equal_cols(a, self.open[b].curr_line())
}
}
/// sort configuration
#[derive(Copy, Clone, Debug, Default)]
pub struct SortConfig {
/// use a different sort algorithm
pub alt_sort : bool,
/// use a different merge algorithm
pub alt_merge : bool
}
impl SortConfig {
/// merge all the files into w, using tmp
pub fn merge_t(
&self,
in_files: &[String],
cmp: &mut LineCompList,
w: impl Write,
unique: bool,
tmp: &TempDir,
) -> Result<()> {
eprintln!("Merging");
if self.alt_merge {
self.merge_t1(in_files, cmp, w, unique, tmp)
} else {
self.merge_t2(in_files, cmp, w, unique, tmp)
}
}
/// merge all the files into w, using tmp
pub fn merge_t2(
&self,
in_files: &[String],
cmp: &mut LineCompList,
mut w: impl Write,
unique: bool,
_tmp: &TempDir,
) -> Result<()> {
if in_files.is_empty() {
return Ok(());
}
if in_files.len() == 1 && !unique {
let r = get_reader(&in_files[0])?;
return copy(r.0, w);
}
let mc = Rc::new(RefCell::new(MergeContext{open : Vec::with_capacity(in_files.len()), cmp}));
let mut heap = BinaryHeap::new_by(|a: &usize, b: &usize| mc.borrow_mut().compare(*a, *b));
{
let mut mcm = mc.borrow_mut();
for x in in_files {
mcm.open.push(Reader::new_open(x)?);
}
if !mcm.cmp.need_split() {
for x in &mut mcm.open {
x.do_split(false);
}
}
// FIXME -- Check Header
if mcm.open[0].has_header() {
w.write_all(mcm.open[0].header().line.as_bytes())?;
}
}
for i in 0..in_files.len() {
if !mc.borrow().open[i].is_done() {
heap.push(i)
}
}
if unique {
if heap.is_empty() {
return Ok(());
}
let first = heap.pop().unwrap();
let mut prev = mc.borrow().open[first].curr_line().clone();
if !mc.borrow_mut().open[first].getline()? {
heap.push(first);
}
w.write_all(prev.line())?;
while !heap.is_empty() {
if let Some(x) = heap.pop() {
let eq = mc.borrow_mut().equal(&prev, x);
if !eq {
let mcm = mc.borrow();
w.write_all(mcm.open[x].curr_line().line())?;
prev.assign(mcm.open[x].curr_line());
}
if !mc.borrow_mut().open[x].getline()? {
heap.push(x);
}
}
}
} else {
while !heap.is_empty() {
if let Some(x) = heap.pop() {
w.write_all(mc.borrow_mut().open[x].curr_line().line())?;
if !mc.borrow_mut().open[x].getline()? {
heap.push(x);
}
}
}
}
Ok(())
}
/// merge all the files into w, using tmp
pub fn merge_t1 (
&self,
in_files: &[String],
cmp: &mut LineCompList,
mut w: impl Write,
unique: bool,
_tmp: &TempDir,
) -> Result<()> {
if in_files.is_empty() {
return Ok(());
}
if in_files.len() == 1 && !unique {
let r = get_reader(&in_files[0])?;
return copy(r.0, w);
}
let mut open_files: Vec<Reader> = Vec::with_capacity(in_files.len());
for x in in_files {
open_files.push(Reader::new_open(x)?);
}
if !cmp.need_split() {
for x in &mut open_files {
x.do_split(false);
}
}
// FIXME -- Check Header
if open_files[0].has_header() {
w.write_all(open_files[0].header().line.as_bytes())?;
}
let nums: Vec<usize> = (0..open_files.len()).collect();
let mut mm = MergeTreeItem::new_tree(&open_files, &nums);
if unique {
let x = mm.next(cmp, &mut open_files)?;
if x.is_none() {
return Ok(());
}
let x = x.unwrap();
w.write_all(open_files[x].curr_line().line())?;
let mut prev = open_files[x].curr_line().clone();
loop {
let x = mm.next(cmp, &mut open_files)?;
if x.is_none() {
break;
}
let x = x.unwrap();
if !cmp.equal_cols(&prev, open_files[x].curr_line()) {
w.write_all(open_files[x].curr_line().line())?;
}
prev.assign(open_files[x].curr_line());
}
} else {
loop {
let x = mm.next(cmp, &mut open_files)?;
if x.is_none() {
break;
}
let x = x.unwrap();
w.write_all(open_files[x].curr_line().line())?;
}
}
Ok(())
}
/// merge all the files into w
pub fn merge(&self, files: &[String], cmp: &mut LineCompList, w: impl Write, unique: bool) -> Result<()> {
let tmp = TempDir::new("merge")?;
if self.alt_merge {
self.merge_t1(files, cmp, w, unique, &tmp)
} else {
self.merge_t2(files, cmp, w, unique, &tmp)
}
}
/// given two file names, merge them into output
pub fn merge_2(
&self,
left: &str,
right: &str,
cmp: &mut LineCompList,
mut w: impl Write,
unique: bool,
) -> Result<()> {
let mut left_file = Reader::new();
let mut right_file = Reader::new();
left_file.open(left)?;
right_file.open(right)?;
left_file.do_split(false);
right_file.do_split(false);
cmp.lookup(&left_file.names())?;
// FIXME -- Check Header
if left_file.has_header() {
w.write_all(left_file.header().line.as_bytes())?;
}
if unique {
let mut prev: Vec<u8> = Vec::new();
while !left_file.is_done() && !right_file.is_done() {
let ord = cmp.comp_lines(left_file.curr().line(), right_file.curr().line());
if ord == Ordering::Less {
left_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
left_file.getline()?;
} else if ord == Ordering::Greater {
right_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
right_file.getline()?;
} else {
left_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
left_file.getline()?;
right_file.getline()?;
}
while !left_file.is_done() && cmp.equal_lines(left_file.curr().line(), &prev) {
left_file.getline()?;
}
while !right_file.is_done() && cmp.equal_lines(right_file.curr().line(), &prev) {
right_file.getline()?;
}
}
} else {
while !left_file.is_done() && !right_file.is_done() {
let ord = cmp.comp_lines(left_file.curr().line(), right_file.curr().line());
// if Equal, write both lines
if ord != Ordering::Less {
right_file.write(&mut w)?;
right_file.getline()?;
}
if ord != Ordering::Greater {
left_file.write(&mut w)?;
left_file.getline()?;
}
}
}
while !left_file.is_done() {
left_file.write(&mut w)?;
left_file.getline()?;
}
while !right_file.is_done() {
right_file.write(&mut w)?;
right_file.getline()?;
}
Ok(())
}
/// Sort all the files together, into w
pub fn sort<W: Write>(&self, files: &[String], cmp: LineCompList, w: &mut W, unique: bool) -> Result<()> // maybe return some useful stats?
{
let mut s = Sorter::new(cmp, 500000000, unique);
for fname in files {
s.add_file(fname, w)?;
}
s.finalize(w)?;
// s.no_del();
Ok(())
}
}
/// Large block of text and pointers to lines therein
#[allow(missing_debug_implementations)]
pub struct Sorter {
config : SortConfig,
ptrs: Vec<Item>,
cmp: LineCompList,
tmp: TempDir,
tmp_files: Vec<String>,
unique: bool,
checker: HeaderChecker,
// raw data. never resized smaller, so use data_used for real size
data: Vec<u8>,
// bytes of real data in Vec
data_used: usize,
// bytes of data referenced by ptrs
// a.k.a. offset of first byte not referenced by ptrs
// assert(data_calc <= data_used)
data_calc: usize,
// number of btes beyond data_calc, known to be free of newlines
// to avoid N^2 craziness with long lines
// assert(data_calc+data_nonl <= data_used)
data_nonl: usize,
}
const MAX_DATA: usize = 0x0ffffff00;
impl Sorter {
/// new Sorter
pub fn new(cmp: LineCompList, max_alloc: usize, unique: bool) -> Self {
let mut data_size = max_alloc / 2;
if data_size > MAX_DATA {
data_size = MAX_DATA;
}
let ptr_size = max_alloc / 2 / std::mem::size_of::<Item>();
Self {
config : SortConfig::default(),
ptrs: Vec::with_capacity(ptr_size),
data: Vec::with_capacity(data_size),
cmp,
tmp: TempDir::new("sort").unwrap(), // FIXME - new should return Result
tmp_files: Vec::new(),
unique,
checker: HeaderChecker::new(),
data_used: 0,
data_calc: 0,
data_nonl: 0,
}
}
fn check(&self) -> bool {
debug_assert!(self.data_used <= self.data.len());
debug_assert!(self.data_calc <= self.data_used);
debug_assert!((self.data_calc + self.data_nonl) <= self.data_used);
true
}
// number of bytes available to write
fn avail(&self) -> usize {
self.data.len() - self.data_used
}
// try to make N bytes available, return amount actually available
fn prepare(&mut self, n: usize) -> usize {
let mut nsize = self.data_used + n;
if nsize > self.data.capacity() {
nsize = self.data.capacity();
}
if self.data.len() < nsize {
self.data.resize(nsize, 0);
}
let avail = self.avail();
if avail < n {
avail
} else {
n
}
}
/// add some more data to be sorted.
/// must be integer number of lines.
pub fn add_data(&mut self, in_data: &[u8]) -> Result<()> {
let sz = self.prepare(in_data.len());
if sz != in_data.len() {
eprintln!("Failed to prepare {}, only got {}", in_data.len(), sz);
return err!("Badness");
}
self.data[self.data_used..self.data_used + in_data.len()].copy_from_slice(in_data);
self.data_used += in_data.len();
// FIXME - add newline
Ok(())
}
/// Add another file's worth of data to the stream
/// possibly writing temporary files
pub fn add(&mut self, mut r: impl Read) -> Result<()> {
loop {
debug_assert!(self.check());
const SIZE: usize = 16 * 1024;
let sz = self.prepare(SIZE);
debug_assert!(sz > 0);
let nbytes = r.read(&mut self.data[self.data_used..self.data_used + sz])?;
if nbytes == 0 {
if self.data_used > 0 && self.data[self.data_used - 1] != b'\n' {
self.data[self.data_used] = b'\n';
self.data_used += 1;
}
return Ok(());
}
self.data_used += nbytes;
// calc new stuff
if self.data_used >= self.data.capacity() {
self.calc();
self.do_sort();
self.write_tmp()?;
}
}
}
/// Populate 'ptrs' from 'data'
fn calc(&mut self) {
self.ptrs.clear();
let mut item = Item::new();
let mut off: usize = 0;
for iter in self.data[0..self.data_used].iter().enumerate() {
if iter.1 == &b'\n' {
item.offset = off as u32;
item.size_plus = (iter.0 - off + 1) as u32;
off = iter.0 + 1;
self.cmp.fill_cache_line(&mut item, &self.data);
self.ptrs.push(item);
}
}
self.data_calc = off;
}
/// write ptrs to tmp file
fn write_tmp(&mut self) -> Result<()> {
let mut tmp_file = self.tmp.path().to_owned();
tmp_file.push(format!("sort_{}.txt", self.tmp_files.len()));
let tmp_name = tmp_file.to_str().unwrap();
let mut new_w = get_writer(tmp_name)?;
for &x in &self.ptrs {
new_w.write_all(x.get(&self.data))?;
}
self.tmp_files.push(tmp_name.to_string());
self.ptrs.clear();
let nsize = self.data.len() - self.data_calc;
for i in 0..nsize {
self.data[i] = self.data[self.data_calc + i];
}
self.data_used = nsize;
self.data_calc = 0;
Ok(())
}
/// sort and unique self.ptrs
fn do_sort(&mut self) {
if self.config.alt_sort {
do_sort_lines(&self.data, &mut self.ptrs, &mut self.cmp);
} else {
self.ptrs.sort_by(|a, b| self.cmp.comp_items(&self.data, a, b));
}
if self.unique {
self.ptrs
.dedup_by(|a, b| self.cmp.equal_items(&self.data, a, b));
}
}
/// All files have been added, write final results
pub fn finalize(&mut self, mut w: impl Write) -> Result<()> {
self.calc();
self.do_sort();
if self.tmp_files.is_empty() {
for &x in &self.ptrs {
w.write_all(x.get(&self.data))?;
}
} else {
self.write_tmp()?;
self.config.merge_t(&self.tmp_files, &mut self.cmp, w, self.unique, &self.tmp)?;
}
Ok(())
}
#[allow(dead_code)]
fn no_del(self) {
eprintln!(
"Not deleting {}",
self.tmp.into_path().into_os_string().to_string_lossy()
);
}
/// add another file to be sorted
pub fn add_file<W: Write>(&mut self, fname: &str, w: &mut W) -> Result<()> {
let mut f = get_reader(fname)?;
let mut first_line = Vec::new();
let n = f.read_until(b'\n', &mut first_line)?;
if n == 0 {
return Ok(());
}
if self.checker.check(&first_line, fname)? {
let s = make_header(&first_line);
self.cmp.lookup(&s.vec())?;
if is_cdx(&first_line) {
w.write_all(&first_line)?;
} else {
self.add_data(&first_line)?;
}
}
self.add(&mut *f)
}
}
type NodeType = Box<MergeTreeItem>;
struct NodeData {
left: NodeType,
right: NodeType,
left_data: Option<usize>,
right_data: Option<usize>,
// done : bool, Optimization?
}
impl NodeData {
fn new(left: NodeType, right: NodeType) -> Self {
Self {
left,
right,
left_data: None,
right_data: None,
}
}
fn left_cols<'a>(&self, files: &'a [Reader]) -> &'a TextLine {
files[self.left_data.unwrap()].curr_line()
}
fn right_cols<'a>(&self, files: &'a [Reader]) -> &'a TextLine {
files[self.right_data.unwrap()].curr_line()
}
1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932
pub fn comp_check(f: &Reader, cmp: &mut LineCompList, unique: bool) -> bool {
let c = cmp.comp_cols(f.prev_line(1), f.curr_line());
let bad = match c {
Ordering::Less => false,
Ordering::Equal => unique,
Ordering::Greater => true,
};
if c == Ordering::Equal && unique {
eprintln!("Lines are equal when they should be unique.");
} else if bad {
eprintln!("Lines are out of order");
}
if bad {
eprint!("{} : ", f.line_number() - 1);
prerr_n(&[f.prev_line(1).line()]);
eprint!("{} : ", f.line_number());
prerr_n(&[f.curr_line().line()]);
}
bad
}
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Select uniq lines.", args::FileCount::Many);
const A: [ArgSpec; 6] = [
arg! {"pattern", "p", "Col,Spec,Pattern", "Select line where this col matches this pattern."},
arg! {"show-const", "", "", "Print available constants"},
arg! {"show-func", "", "", "Print available functions"},
arg! {"or", "o", "", "A line matches if any of the matchers matches."},
arg! {"invert", "v", "", "Print lines that don't match."},
arg! {"location", "l", "name:what", "prefix extra columns of location context."},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut list = LineMatcherList::new_with(Combiner::And);
let mut reverse = false;
let mut loc = FileLocList::new();
for x in args {
if x.name == "pattern" {
list.push(&x.value)?;
} else if x.name == "or" {
list.multi = Combiner::Or;
} else if x.name == "invert" {
reverse = true;
} else if x.name == "location" {
loc.push(&x.value)?;
} else if x.name == "show-const" {
expr::show_const();
return Ok(());
} else if x.name == "show-func" {
expr::show_func();
return Ok(());
} else {
unreachable!();
}
}
let mut w = get_writer("-")?;
for x in &files {
let mut f = Reader::new_open(x)?;
if f.is_empty() {
continue;
}
list.lookup(&f.names())?;
let mut not_header = String::new();
let mut header = ColumnHeader::new();
loc.add(&mut header)?;
header.push_all(f.header())?;
if f.has_header() {
not_header = header.get_head(b'\t');
}
if settings.checker.check(not_header.as_bytes(), x)? {
w.write_all(not_header.as_bytes())?;
}
if f.is_done() {
continue;
}
loop {
if list.ok(f.curr_line()) ^ reverse {
// write previous lines of context if necessary
loc.write_data(&mut w.0, b'\t', f.loc())?;
f.write_curr(&mut w.0)?;
} else {
// write more lines of context if necessary
}
if f.getline()? {
break;
}
}
}
Ok(())
}
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Sample lines from files.", args::FileCount::Many);
const A: [ArgSpec; 4] = [
arg_enum! {"header", "h", "Mode", "header requirements", &HEADER_MODE},
arg! {"for", "f", "by,from,to", "for i=from; i<=to; i+= by"},
arg! {"sample", "s", "Number", "Select this number of lines, more or less evenly spaced."},
arg! {"range", "r", "Ranges", "e.g. 1-5,42,95-106."},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut checker = HeaderChecker::new();
let mut floop = For::default();
let mut ranges = Ranges::default();
let mut sample = 10;
let mut saw_sample = false;
let mut saw_for = false;
let mut saw_range = false;
for x in args {
if x.name == "header" {
checker.mode = HeaderMode::from_str(&x.value)?;
} else if x.name == "for" {
if saw_for || saw_sample || saw_range {
return err!("No more that one --sample, --range or --for allowed");
}
floop = For::new(&x.value)?;
saw_for = true;
} else if x.name == "range" {
if saw_for || saw_sample || saw_range {
return err!("No more that one --sample, --range or --for allowed");
}
saw_range = true;
for x in x.value.split(',') {
ranges.push(x)?;
}
} else if x.name == "sample" {
if saw_for || saw_sample || saw_range {
return err!("No more that one --sample, --range or --for allowed");
}
sample = x.value.to_usize_whole(x.value.as_bytes(), "sample size")?;
saw_sample = true;
} else {
unreachable!();
}
}
if !saw_for && !saw_sample && !saw_range {
saw_sample = true;
sample = Rect::from_screen().height;
}
let mut w = get_writer("-")?;
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if f.is_empty() {
continue;
}
if checker.check_file(&f, x)? {
w.write_all(f.header().line.as_bytes())?;
}
if f.is_done() {
return Ok(());
}
f.do_split(false);
if saw_sample {
let mut s = Smooth::new(sample);
loop {
s.add(f.curr().line());
if f.getline()? {
break;
}
}
s.finalize(&mut w.0)?;
} else if saw_for {
let mut next = floop.from;
while f.line_number() <= floop.to {
if f.line_number() == next {
w.write_all(f.curr_line().line())?;
next += floop.by;
}
if f.getline()? {
break;
}
}
} else {
let max = ranges.max();
while f.line_number() <= max {
if ranges.contains(f.line_number()) {
w.write_all(f.curr_line().line())?;
}
if f.getline()? {
break;
}
}
}
}
Ok(())
}
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Verify file contents.", args::FileCount::Many);
const A: [ArgSpec; 10] = [
arg! {"report", "r", "Number", "How many failures to report before exit."},
arg! {"first", "f", "Op,Value", "'FirstLine Op Value' must be true. E.g LT,a for first line is less than 'a'."},
arg! {"last", "l", "Op,Value", "'LastLine Op Value' must be true."},
arg! {"key", "k", "Spec", "How to compare adjacent lines"},
arg! {"sort", "s", "", "Check that the file is sorted."},
arg! {"unique", "u", "", "Check that the file is sorted, with unique lines."},
arg! {"pattern", "p", "Col,Spec,Pattern", "Select line where this col matches this pattern."},
arg! {"show-matchers", "", "", "Print available matchers"},
arg! {"show-const", "", "", "Print available constants"},
arg! {"show-func", "", "", "Print available functions"},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut list = LineMatcherList::new_with(Combiner::And);
let mut comp = LineCompList::new();
let mut do_sort = false;
let mut do_unique = false;
let mut max_fails = 5;
let mut first: Option<CheckLine> = None;
let mut last: Option<CheckLine> = None;
for x in args {
if x.name == "pattern" {
list.push(&x.value)?;
} else if x.name == "key" {
comp.add(&x.value)?;
} else if x.name == "or" {
list.multi = Combiner::Or;
} else if x.name == "fail" {
max_fails = x.value.to_usize_whole(x.value.as_bytes(), "max fails")?;
} else if x.name == "sort" {
do_sort = true;
} else if x.name == "first" {
first = Some(CheckLine::new(&x.value)?);
} else if x.name == "last" {
last = Some(CheckLine::new(&x.value)?);
} else if x.name == "unique" {
do_sort = true;
do_unique = true;
} else if x.name == "show-const" {
expr::show_const();
return Ok(());
} else if x.name == "show-func" {
expr::show_func();
return Ok(());
} else {
unreachable!();
}
}
if comp.is_empty() {
comp.add("")?;
}
let mut fails = 0;
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if f.is_empty() {
continue;
}
list.lookup(&f.names())?;
comp.lookup(&f.names())?;
if f.is_done() {
continue;
}
if first.is_some()
&& !first.as_ref().unwrap().line_ok_verbose(
f.curr_line(),
&mut comp,
f.line_number(),
)?
{
fails += 1;
}
let num_cols = f.names().len();
loop {
let mut did_fail = false;
if f.curr().len() != num_cols {
eprintln!(
"Expected {num_cols} columns, but line {} of {} had {}",
f.line_number() + 1,
x,
f.curr().len()
);
did_fail = true;
}
if !list.ok_verbose(f.curr_line(), f.line_number(), x) {
did_fail = true;
}
if f.getline()? {
if last.is_some()
&& !last.as_ref().unwrap().line_ok_verbose(
f.prev_line(1),
&mut comp,
f.line_number() - 1,
)?
{
fails += 1;
}
break;
}
if do_sort {
did_fail = did_fail || comp_check(&f, &mut comp, do_unique);
}
if did_fail {
fails += 1;
if fails >= max_fails {
break;
}
}
}
if fails > 0 {
return cdx_err(CdxError::Silent);
}
}
Ok(())
}
sourcepub fn curr_mut(&mut self) -> &mut TextLine
pub fn curr_mut(&mut self) -> &mut TextLine
get current line of text
Examples found in repository
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
pub fn merge_2(
&self,
left: &str,
right: &str,
cmp: &mut LineCompList,
mut w: impl Write,
unique: bool,
) -> Result<()> {
let mut left_file = Reader::new();
let mut right_file = Reader::new();
left_file.open(left)?;
right_file.open(right)?;
left_file.do_split(false);
right_file.do_split(false);
cmp.lookup(&left_file.names())?;
// FIXME -- Check Header
if left_file.has_header() {
w.write_all(left_file.header().line.as_bytes())?;
}
if unique {
let mut prev: Vec<u8> = Vec::new();
while !left_file.is_done() && !right_file.is_done() {
let ord = cmp.comp_lines(left_file.curr().line(), right_file.curr().line());
if ord == Ordering::Less {
left_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
left_file.getline()?;
} else if ord == Ordering::Greater {
right_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
right_file.getline()?;
} else {
left_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
left_file.getline()?;
right_file.getline()?;
}
while !left_file.is_done() && cmp.equal_lines(left_file.curr().line(), &prev) {
left_file.getline()?;
}
while !right_file.is_done() && cmp.equal_lines(right_file.curr().line(), &prev) {
right_file.getline()?;
}
}
} else {
while !left_file.is_done() && !right_file.is_done() {
let ord = cmp.comp_lines(left_file.curr().line(), right_file.curr().line());
// if Equal, write both lines
if ord != Ordering::Less {
right_file.write(&mut w)?;
right_file.getline()?;
}
if ord != Ordering::Greater {
left_file.write(&mut w)?;
left_file.getline()?;
}
}
}
while !left_file.is_done() {
left_file.write(&mut w)?;
left_file.getline()?;
}
while !right_file.is_done() {
right_file.write(&mut w)?;
right_file.getline()?;
}
Ok(())
}
sourcepub fn curr(&self) -> &TextLine
pub fn curr(&self) -> &TextLine
get current line of text
Examples found in repository
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320
fn write(&self, mut w: impl Write, f: &[Reader]) -> Result<()> {
w.write_all(f[self.file].curr().get(self.col.num))?;
Ok(())
}
fn write_head(&self, mut w: impl Write, f: &[Reader]) -> Result<()> {
w.write_all(f[self.file].header().get(self.col.num).as_bytes())?;
Ok(())
}
}
impl JoinConfig {
/// create new JoinConfig. Note that derived 'default' is sub-optimal
pub fn new() -> Self {
Self {
match_out: "-".to_string(),
out_delim: b'\t',
lookback_limit: 100,
..Self::default()
}
}
/// perform the join
pub fn join(&self) -> Result<()> {
Joiner::new(self)?.join(self)
}
}
/// does the actual joining
struct Joiner {
r: Vec<Reader>,
comp: LineCompList,
yes_match: Outfile,
no_match: Vec<Option<Outfile>>,
out_cols: Vec<OneOutCol>,
}
impl Joiner {
fn new(config: &JoinConfig) -> Result<Self> {
Ok(Self {
r: Vec::new(),
comp: LineCompList::new(),
yes_match: get_writer(&config.match_out)?,
no_match: Vec::new(),
out_cols: Vec::new(),
})
}
fn join(&mut self, config: &JoinConfig) -> Result<()> {
if config.infiles.len() < 2 {
return err!(
"Join requires at least two input files, {} found",
config.infiles.len()
);
}
for x in &config.infiles {
self.r.push(Reader::new_open(x)?);
}
for _x in 0..config.infiles.len() {
self.no_match.push(None)
}
for x in &config.unmatch_out {
if (x.file_num < 1) || (x.file_num > config.infiles.len()) {
return err!(
"Join had {} input files, but requested non matching lines from file {}",
config.infiles.len(),
x.file_num
);
}
let num = x.file_num - 1;
if self.no_match[num].is_none() {
let mut w = get_writer(&x.file_name)?;
self.r[num].write_header(&mut *w)?;
self.no_match[num] = Some(w);
} else {
return err!("Multiple uses of --also for file {}", x.file_num);
}
}
if config.keys.is_empty() {
self.comp.push(CompMaker::make_line_comp("1")?);
} else {
for x in &config.keys {
self.comp.push(CompMaker::make_line_comp(x)?);
}
}
for i in 0..self.r.len() {
self.comp.lookup_n(&self.r[i].names(), i)?;
}
if config.col_specs.is_empty() {
for f in 0..self.r.len() {
let used = self.comp.used_cols(f);
for x in 0..self.r[f].names().len() {
if (f == 0) || !used.contains(&x) {
self.out_cols.push(OneOutCol::new_plain(f, x));
}
}
}
} else {
for x in &config.col_specs {
let mut x = x.clone();
if x.file >= self.r.len() {
return err!(
"{} input files, but file {} referred to as an output column",
self.r.len(),
x.file
);
}
x.cols.lookup(&self.r[x.file].names())?;
for y in x.cols.get_cols() {
self.out_cols.push(OneOutCol::new(x.file, y));
}
}
}
if self.out_cols.is_empty() {
return err!("No output columns specified");
}
if self.r[0].has_header() {
self.yes_match.write_all(b" CDX")?;
for x in &self.out_cols {
self.yes_match.write_all(&[config.out_delim])?;
x.write_head(&mut *self.yes_match, &self.r)?;
}
self.yes_match.0.write_all(&[b'\n'])?;
}
if config.jtype == JoinType::Quick {
self.join_quick(config)
} else {
err!("Only quick supported")
}
}
fn join_quick(&mut self, config: &JoinConfig) -> Result<()> {
if !self.r[0].is_done() && !self.r[1].is_done() {
let mut cmp = self
.comp
.comp_cols_n(self.r[0].curr(), self.r[1].curr(), 0, 1);
'outer: loop {
match cmp {
Ordering::Equal => loop {
self.out_cols[0].write(&mut *self.yes_match, &self.r)?;
for x in &self.out_cols[1..] {
self.yes_match.write_all(&[config.out_delim])?;
x.write(&mut *self.yes_match, &self.r)?;
}
self.yes_match.write_all(&[b'\n'])?;
if self.r[0].getline()? {
self.r[1].getline()?;
break 'outer;
}
cmp = self
.comp
.comp_cols_n(self.r[0].curr(), self.r[1].curr(), 0, 1);
if cmp != Ordering::Equal {
if self.r[1].getline()? {
break 'outer;
}
cmp = self
.comp
.comp_cols_n(self.r[0].curr(), self.r[1].curr(), 0, 1);
break;
}
},
Ordering::Less => {
if let Some(x) = &mut self.no_match[0] {
self.r[0].write(&mut x.0)?;
}
if self.r[0].getline()? {
break;
}
cmp = self
.comp
.comp_cols_n(self.r[0].curr(), self.r[1].curr(), 0, 1);
}
Ordering::Greater => {
if let Some(x) = &mut self.no_match[1] {
self.r[1].write(&mut x.0)?;
}
if self.r[1].getline()? {
break;
}
cmp = self
.comp
.comp_cols_n(self.r[0].curr(), self.r[1].curr(), 0, 1);
}
}
}
}
while !self.r[0].is_done() {
if let Some(x) = &mut self.no_match[0] {
self.r[0].write(&mut x.0)?;
}
self.r[0].getline()?;
}
while !self.r[1].is_done() {
if let Some(x) = &mut self.no_match[1] {
self.r[1].write(&mut x.0)?;
}
self.r[1].getline()?;
}
Ok(())
}
More examples
594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666
fn load_hashset(data: &mut HashSet<Vec<u8>>, fname: &str) -> Result<()> {
let mut f = Reader::new();
f.do_split(false);
f.open(fname)?;
if f.is_done() {
return Ok(());
}
loop {
let line = &f.curr().line();
if line.len() > 1 {
data.insert(line[0..line.len() - 1].to_vec());
}
if f.getline()? {
break;
}
}
Ok(())
}
#[derive(Debug, Clone)]
/// pattern is file name. String exactly matches one line of file.
struct FileExactMatch {
data: HashSet<Vec<u8>>,
file_name: String,
}
impl FileExactMatch {
fn new(file_name: &str) -> Result<Self> {
let mut d = HashSet::new();
load_hashset(&mut d, file_name)?;
Ok(Self {
data: d,
file_name: file_name.to_string(),
})
}
}
impl Match for FileExactMatch {
fn smatch(&self, buff: &str) -> bool {
self.data.contains(buff.as_bytes())
}
fn umatch(&self, buff: &[u8]) -> bool {
self.data.contains(buff)
}
fn show(&self) -> String {
format!("Exact Match of one line in file {}", self.file_name)
}
}
fn load_hashset_c(data: &mut HashSet<Vec<u8>>, fname: &str, unicode: bool) -> Result<()> {
let mut f = Reader::new();
f.do_split(false);
f.open(fname)?;
if f.is_done() {
return Ok(());
}
loop {
let mut line: &[u8] = f.curr().line();
if line.len() > 1 {
if line.last().unwrap() == &b'\n' {
line = &line[..line.len() - 1];
}
if unicode {
data.insert(String::from_utf8(line.to_vec())?.new_lower().into_bytes());
// PERF - 2 allocations
} else {
data.insert(line.new_lower());
}
}
if f.getline()? {
break;
}
}
Ok(())
}
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
pub fn transpose(file: &str, head: bool, max_lines: usize) -> Result<()> {
let mut f = Reader::new();
f.open(file)?;
if f.is_empty() {
return Ok(());
}
let mut data = Vec::new();
let mut lines = 0;
while lines < max_lines {
data.push(f.curr().clone());
lines += 1;
if f.getline()? {
break;
}
}
let mut w = get_writer("-")?;
if head {
w.write_all(b" CDX\t")?;
}
for i in 0..f.header().len() {
let mut need_tab = if f.has_header() {
w.write_all(f.header()[i].as_bytes())?;
true
} else {
false
};
for x in &data {
if need_tab {
w.write_all(b"\t")?;
}
need_tab = true;
w.write_all(&x[i])?;
}
w.write_all(b"\n")?;
}
Ok(())
}
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
pub fn show(file: &str, screen: &Rect) -> Result<()> {
let mut f = Reader::new();
f.open(file)?;
if f.is_empty() {
return Ok(());
}
let mut lines: Vec<StringLine> = Vec::new();
let mut sizes: Vec<usize> = Vec::new();
sizes.resize(f.header().len(), 0);
if f.has_header() {
lines.push(f.header().clone());
}
if !f.is_done() {
while lines.len() < screen.height {
let mut s = StringLine::new();
s.line = String::from_utf8_lossy(f.curr().line()).to_string();
s.split(f.delim());
lines.push(s);
if f.getline()? {
break;
}
}
}
for x in &lines {
for (i, c) in x.iter().enumerate() {
let width = UnicodeWidthStr::width(c);
if sizes[i] < width {
sizes[i] = width;
}
}
}
let mut total: usize = sizes.iter().sum();
let target = screen.width - sizes.len();
while total > target {
dec_max(&mut sizes);
total = sizes.iter().sum();
}
let mut w = get_writer("-")?;
let mut do_center = f.has_header();
for x in &lines {
let mut need_space = false;
for (c, y) in x.iter().enumerate() {
let (nstr, width) = y.unicode_truncate(sizes[c]);
if need_space {
w.write_all(b" ")?;
}
let num = (sizes[c] - width) / 2;
if do_center {
for _ in 0..num {
w.write_all(b" ")?;
}
}
w.write_all(nstr.as_bytes())?;
if do_center {
let num2 = (sizes[c] - width) - num;
for _ in 0..num2 {
w.write_all(b" ")?;
}
} else {
for _ in width..sizes[c] {
w.write_all(b" ")?;
}
}
need_space = true;
}
do_center = false;
w.write_all(b"\n")?;
}
Ok(())
}
/// show the file in a specific rectangle
pub fn show2(file: &str, screen: &Rect, w: &mut Vec<String>) -> Result<usize> {
let mut f = Reader::new();
f.open(file)?;
if f.is_empty() {
return Ok(0);
}
let mut lines: Vec<StringLine> = Vec::new();
let mut sizes: Vec<usize> = Vec::new();
sizes.resize(f.header().len(), 0);
if f.has_header() {
lines.push(f.header().clone());
}
if !f.is_done() {
while lines.len() < screen.height {
let mut s = StringLine::new();
s.line = String::from_utf8_lossy(f.curr().line()).to_string();
s.split(f.delim());
lines.push(s);
if f.getline()? {
break;
}
}
}
for x in &lines {
for (i, c) in x.iter().enumerate() {
let width = UnicodeWidthStr::width(c);
if sizes[i] < width {
sizes[i] = width;
}
}
}
let mut total: usize = sizes.iter().sum();
let target = screen.width - sizes.len();
while total > target {
dec_max(&mut sizes);
total = sizes.iter().sum();
}
let mut do_center = f.has_header();
w.clear();
for x in &lines {
let mut s = String::new();
let mut need_space = false;
for (c, y) in x.iter().enumerate() {
let (nstr, width) = y.unicode_truncate(sizes[c]);
if need_space {
s.push(' ');
}
let num = (sizes[c] - width) / 2;
if do_center {
for _ in 0..num {
s.push(' ');
}
}
s.push_str(nstr);
if do_center {
let num2 = (sizes[c] - width) - num;
for _ in 0..num2 {
s.push(' ');
}
} else {
for _ in width..sizes[c] {
s.push(' ');
}
}
need_space = true;
}
do_center = false;
w.push(s);
}
Ok(sizes.iter().sum::<usize>() + sizes.len())
}
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Select columns", args::FileCount::Many);
const A: [ArgSpec; 5] = [
arg! {"fields", "f", "Columns", "the columns to select."},
arg! {"group", "g", "Columns", "the columns in a bunch, e.g. '.group:1-3'"},
arg! {"expr", "e", "Name:Expr", "The result of an arithmetic expression"},
arg! {"composite", "c", "Spec", "new value made from parts. e.g. 'stuff:abc^{two}def'"},
arg_enum! {"dups", "D", "Mode", "Duplicate Column Handling", &["Fail", "Allow", "Numeric"]},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut header = ColumnHeader::new();
let mut v = Writer::new(b'\t');
for x in args {
if x.name == "dups" {
header.set_handling(DupColHandling::new(&x.value)?);
} else if x.name == "fields" {
v.push(Box::new(ReaderColumns::new(ColumnSet::from_spec(
&x.value,
)?)));
} else if x.name == "group" {
v.push(Box::new(ColumnClump::from_spec(&x.value)?));
} else if x.name == "expr" {
v.push(Box::new(ColumnExpr::new(&x.value)?));
} else if x.name == "composite" {
v.push(Box::new(CompositeColumn::new(&x.value)?));
} else {
unreachable!();
}
}
if v.is_empty() {
bail!("cut requires at lease one --columns or --groups");
}
let mut w = get_writer("-")?;
let mut not_header = String::new();
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if f.is_empty() {
continue;
}
v.lookup(&f.names())?;
header.clear();
not_header.clear();
v.add_names(&mut header, f.header())?;
if f.has_header() {
not_header = header.get_head(b'\t');
}
if settings.checker.check(not_header.as_bytes(), x)? {
w.write_all(not_header.as_bytes())?;
}
if f.is_done() {
continue;
}
loop {
v.write(&mut w.0, f.curr())?;
if f.getline()? {
break;
}
}
}
Ok(())
}
sourcepub fn prev_line(&self, lookback: usize) -> &TextLine
pub fn prev_line(&self, lookback: usize) -> &TextLine
get a previous line of text looking back from the start of the file shows empty lines.
Examples found in repository
1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136
pub fn prev_nl(&self, n: usize) -> &[u8] {
let line = self.prev_line(n);
&line.line[0..line.line.len() - 1]
}
/// get delimiter
pub const fn delim(&self) -> u8 {
self.cont.delim
}
/// get column names
pub fn names(&self) -> Vec<&str> {
self.cont.header.vec()
}
/// write the current text line with newline
pub fn write(&self, w: &mut impl Write) -> Result<()> {
w.write_all(&self.curr_line().line)?;
Ok(())
}
/// open file for reading
pub fn open(&mut self, name: &str) -> Result<()> {
self.file = get_reader(name)?;
self.cont.read_header(&mut *self.file, &mut self.lines[0])
}
/// The full text of the header, without the trailing newline
pub const fn header_line(&self) -> &String {
&self.cont.header.line
}
/// was file zero bytes?
pub const fn is_empty(&self) -> bool {
self.cont.is_empty
}
/// have we hit EOF?
pub const fn is_done(&self) -> bool {
self.cont.is_done
}
/// line number of curr_line
pub const fn line_number(&self) -> usize {
self.loc.line
}
fn incr(&mut self) {
self.loc.line += 1;
self.curr += 1;
if self.curr >= self.lines.len() {
self.curr = 0;
}
}
/// get next line of text
pub fn getline(&mut self) -> Result<bool> {
self.loc.bytes += self.curr().line.len();
self.incr();
if self.lines[self.curr].read(&mut *self.file)? {
self.cont.is_done = true;
} else if self.do_split {
self.lines[self.curr].split(self.cont.delim);
}
Ok(self.cont.is_done)
}
/// get current line of text
pub fn curr_line(&self) -> &TextLine {
&self.lines[self.curr]
}
/// get current line of text
pub fn curr_mut(&mut self) -> &mut TextLine {
&mut self.lines[self.curr]
}
/// get current line of text
pub fn curr(&self) -> &TextLine {
&self.lines[self.curr]
}
/// get a previous line of text
/// looking back from the start of the file shows empty lines.
pub fn prev_line(&self, lookback: usize) -> &TextLine {
if lookback <= self.curr {
&self.lines[self.curr - lookback]
} else {
&self.lines[self.curr + self.lines.len() - lookback]
}
}
/// write the current text line with newline
pub fn write_curr(&self, w: &mut impl Write) -> Result<()> {
w.write_all(&self.curr_line().line)?;
Ok(())
}
/// write previous text line with newline
pub fn write_prev(&self, w: &mut impl Write, lookback: usize) -> Result<()> {
w.write_all(&self.prev_line(lookback).line)?;
Ok(())
}
More examples
1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932
pub fn comp_check(f: &Reader, cmp: &mut LineCompList, unique: bool) -> bool {
let c = cmp.comp_cols(f.prev_line(1), f.curr_line());
let bad = match c {
Ordering::Less => false,
Ordering::Equal => unique,
Ordering::Greater => true,
};
if c == Ordering::Equal && unique {
eprintln!("Lines are equal when they should be unique.");
} else if bad {
eprintln!("Lines are out of order");
}
if bad {
eprint!("{} : ", f.line_number() - 1);
prerr_n(&[f.prev_line(1).line()]);
eprint!("{} : ", f.line_number());
prerr_n(&[f.curr_line().line()]);
}
bad
}
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Verify file contents.", args::FileCount::Many);
const A: [ArgSpec; 10] = [
arg! {"report", "r", "Number", "How many failures to report before exit."},
arg! {"first", "f", "Op,Value", "'FirstLine Op Value' must be true. E.g LT,a for first line is less than 'a'."},
arg! {"last", "l", "Op,Value", "'LastLine Op Value' must be true."},
arg! {"key", "k", "Spec", "How to compare adjacent lines"},
arg! {"sort", "s", "", "Check that the file is sorted."},
arg! {"unique", "u", "", "Check that the file is sorted, with unique lines."},
arg! {"pattern", "p", "Col,Spec,Pattern", "Select line where this col matches this pattern."},
arg! {"show-matchers", "", "", "Print available matchers"},
arg! {"show-const", "", "", "Print available constants"},
arg! {"show-func", "", "", "Print available functions"},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut list = LineMatcherList::new_with(Combiner::And);
let mut comp = LineCompList::new();
let mut do_sort = false;
let mut do_unique = false;
let mut max_fails = 5;
let mut first: Option<CheckLine> = None;
let mut last: Option<CheckLine> = None;
for x in args {
if x.name == "pattern" {
list.push(&x.value)?;
} else if x.name == "key" {
comp.add(&x.value)?;
} else if x.name == "or" {
list.multi = Combiner::Or;
} else if x.name == "fail" {
max_fails = x.value.to_usize_whole(x.value.as_bytes(), "max fails")?;
} else if x.name == "sort" {
do_sort = true;
} else if x.name == "first" {
first = Some(CheckLine::new(&x.value)?);
} else if x.name == "last" {
last = Some(CheckLine::new(&x.value)?);
} else if x.name == "unique" {
do_sort = true;
do_unique = true;
} else if x.name == "show-const" {
expr::show_const();
return Ok(());
} else if x.name == "show-func" {
expr::show_func();
return Ok(());
} else {
unreachable!();
}
}
if comp.is_empty() {
comp.add("")?;
}
let mut fails = 0;
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if f.is_empty() {
continue;
}
list.lookup(&f.names())?;
comp.lookup(&f.names())?;
if f.is_done() {
continue;
}
if first.is_some()
&& !first.as_ref().unwrap().line_ok_verbose(
f.curr_line(),
&mut comp,
f.line_number(),
)?
{
fails += 1;
}
let num_cols = f.names().len();
loop {
let mut did_fail = false;
if f.curr().len() != num_cols {
eprintln!(
"Expected {num_cols} columns, but line {} of {} had {}",
f.line_number() + 1,
x,
f.curr().len()
);
did_fail = true;
}
if !list.ok_verbose(f.curr_line(), f.line_number(), x) {
did_fail = true;
}
if f.getline()? {
if last.is_some()
&& !last.as_ref().unwrap().line_ok_verbose(
f.prev_line(1),
&mut comp,
f.line_number() - 1,
)?
{
fails += 1;
}
break;
}
if do_sort {
did_fail = did_fail || comp_check(&f, &mut comp, do_unique);
}
if did_fail {
fails += 1;
if fails >= max_fails {
break;
}
}
}
if fails > 0 {
return cdx_err(CdxError::Silent);
}
}
Ok(())
}
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Select uniq lines.", args::FileCount::One);
const A: [ArgSpec; 7] = [
arg! {"agg", "a", "Col,Spec", "Merge value from this column, in place."},
arg! {"agg-pre", "", "NewCol,SrcCol,Spec", "Merge value from SrcCol into new column, before other columns."},
arg! {"agg-post", "", "NewCol,SrcCol,Spec", "Merge value from SrcCol into new column, after other columns."},
arg! {"key", "k", "Spec", "How to compare adjacent lines"},
arg! {"count", "c", "ColName,Position", "Write the count of matching line."},
arg! {"which", "w", "(First,Last,Min,Max)[,LineCompare]", "Which of the matching lines should be printed."},
arg! {"agg-help", "", "", "Print help for aggregators"},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut agg = LineAggList::new();
let mut comp = LineCompList::new();
let mut count = Count::default();
for x in args {
if x.name == "key" {
comp.add(&x.value)?;
} else if x.name == "count" {
count.get_count(&x.value)?;
} else if x.name == "which" {
count.get_which(&x.value)?;
} else if x.name == "agg" {
agg.push_replace(&x.value)?;
} else if x.name == "agg-post" {
agg.push_append(&x.value)?;
} else if x.name == "agg-pre" {
agg.push_prefix(&x.value)?;
} else {
unreachable!();
}
}
assert_eq!(files.len(), 1);
let mut f = Reader::new();
f.open(&files[0])?;
if f.is_empty() {
return Ok(());
}
comp.lookup(&f.names())?;
count.lookup(&f.names())?;
let mut c_write = Writer::new(f.delim());
if !agg.is_empty() {
if count.pos == CountPos::Begin {
agg.push_first_prefix(&format!("{},1,count", count.name))?;
}
if count.pos == CountPos::End {
agg.push_append(&format!("{},1,count", count.name))?;
}
agg.lookup(&f.names())?;
agg.fill(&mut c_write, f.header());
c_write.lookup(&f.names())?;
}
let mut w = get_writer("-")?;
if f.has_header() {
let mut ch = ColumnHeader::new();
if agg.is_empty() {
if count.pos == CountPos::Begin {
ch.push(&count.name)?;
}
ch.push_all(f.header())?;
if count.pos == CountPos::End {
ch.push(&count.name)?;
}
} else {
c_write.add_names(&mut ch, f.header())?;
}
w.write_all(ch.get_head(f.delim()).as_bytes())?;
}
if f.is_done() {
return Ok(());
}
f.do_split(comp.need_split());
let mut matches = 1;
if !agg.is_empty() {
agg.add(f.curr_line());
let mut tmp = f.curr_line().clone();
loop {
if f.getline()? {
c_write.write(&mut w.0, &tmp)?;
break;
}
if comp.equal_cols(f.prev_line(1), f.curr_line()) {
count.assign(&mut tmp, f.curr_line());
agg.add(f.curr_line());
} else {
c_write.write(&mut w.0, &tmp)?;
tmp.assign(f.curr_line());
agg.reset();
agg.add(f.curr_line());
}
}
} else if count.which == Which::Last {
loop {
if f.getline()? {
count.write(&mut w.0, matches, f.prev_line(1).line(), f.delim())?;
break;
}
if comp.equal_cols(f.prev_line(1), f.curr_line()) {
matches += 1;
} else {
count.write(&mut w.0, matches, f.prev_line(1).line(), f.delim())?;
matches = 1;
}
}
} else if count.which == Which::First && count.is_plain() {
f.write_curr(&mut w.0)?;
loop {
if f.getline()? {
break;
}
if !comp.equal_cols(f.prev_line(1), f.curr_line()) {
f.write_curr(&mut w.0)?;
}
}
} else {
let mut tmp = f.curr_line().clone();
loop {
if f.getline()? {
count.write(&mut w.0, matches, tmp.line(), f.delim())?;
break;
}
if comp.equal_cols(f.prev_line(1), f.curr_line()) {
count.assign(&mut tmp, f.curr_line());
matches += 1;
} else {
count.write(&mut w.0, matches, tmp.line(), f.delim())?;
tmp.assign(f.curr_line());
matches = 1;
}
}
}
Ok(())
}
sourcepub fn write_curr(&self, w: &mut impl Write) -> Result<()>
pub fn write_curr(&self, w: &mut impl Write) -> Result<()>
write the current text line with newline
Examples found in repository
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Select uniq lines.", args::FileCount::Many);
const A: [ArgSpec; 6] = [
arg! {"pattern", "p", "Col,Spec,Pattern", "Select line where this col matches this pattern."},
arg! {"show-const", "", "", "Print available constants"},
arg! {"show-func", "", "", "Print available functions"},
arg! {"or", "o", "", "A line matches if any of the matchers matches."},
arg! {"invert", "v", "", "Print lines that don't match."},
arg! {"location", "l", "name:what", "prefix extra columns of location context."},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut list = LineMatcherList::new_with(Combiner::And);
let mut reverse = false;
let mut loc = FileLocList::new();
for x in args {
if x.name == "pattern" {
list.push(&x.value)?;
} else if x.name == "or" {
list.multi = Combiner::Or;
} else if x.name == "invert" {
reverse = true;
} else if x.name == "location" {
loc.push(&x.value)?;
} else if x.name == "show-const" {
expr::show_const();
return Ok(());
} else if x.name == "show-func" {
expr::show_func();
return Ok(());
} else {
unreachable!();
}
}
let mut w = get_writer("-")?;
for x in &files {
let mut f = Reader::new_open(x)?;
if f.is_empty() {
continue;
}
list.lookup(&f.names())?;
let mut not_header = String::new();
let mut header = ColumnHeader::new();
loc.add(&mut header)?;
header.push_all(f.header())?;
if f.has_header() {
not_header = header.get_head(b'\t');
}
if settings.checker.check(not_header.as_bytes(), x)? {
w.write_all(not_header.as_bytes())?;
}
if f.is_done() {
continue;
}
loop {
if list.ok(f.curr_line()) ^ reverse {
// write previous lines of context if necessary
loc.write_data(&mut w.0, b'\t', f.loc())?;
f.write_curr(&mut w.0)?;
} else {
// write more lines of context if necessary
}
if f.getline()? {
break;
}
}
}
Ok(())
}
More examples
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Select uniq lines.", args::FileCount::One);
const A: [ArgSpec; 7] = [
arg! {"agg", "a", "Col,Spec", "Merge value from this column, in place."},
arg! {"agg-pre", "", "NewCol,SrcCol,Spec", "Merge value from SrcCol into new column, before other columns."},
arg! {"agg-post", "", "NewCol,SrcCol,Spec", "Merge value from SrcCol into new column, after other columns."},
arg! {"key", "k", "Spec", "How to compare adjacent lines"},
arg! {"count", "c", "ColName,Position", "Write the count of matching line."},
arg! {"which", "w", "(First,Last,Min,Max)[,LineCompare]", "Which of the matching lines should be printed."},
arg! {"agg-help", "", "", "Print help for aggregators"},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut agg = LineAggList::new();
let mut comp = LineCompList::new();
let mut count = Count::default();
for x in args {
if x.name == "key" {
comp.add(&x.value)?;
} else if x.name == "count" {
count.get_count(&x.value)?;
} else if x.name == "which" {
count.get_which(&x.value)?;
} else if x.name == "agg" {
agg.push_replace(&x.value)?;
} else if x.name == "agg-post" {
agg.push_append(&x.value)?;
} else if x.name == "agg-pre" {
agg.push_prefix(&x.value)?;
} else {
unreachable!();
}
}
assert_eq!(files.len(), 1);
let mut f = Reader::new();
f.open(&files[0])?;
if f.is_empty() {
return Ok(());
}
comp.lookup(&f.names())?;
count.lookup(&f.names())?;
let mut c_write = Writer::new(f.delim());
if !agg.is_empty() {
if count.pos == CountPos::Begin {
agg.push_first_prefix(&format!("{},1,count", count.name))?;
}
if count.pos == CountPos::End {
agg.push_append(&format!("{},1,count", count.name))?;
}
agg.lookup(&f.names())?;
agg.fill(&mut c_write, f.header());
c_write.lookup(&f.names())?;
}
let mut w = get_writer("-")?;
if f.has_header() {
let mut ch = ColumnHeader::new();
if agg.is_empty() {
if count.pos == CountPos::Begin {
ch.push(&count.name)?;
}
ch.push_all(f.header())?;
if count.pos == CountPos::End {
ch.push(&count.name)?;
}
} else {
c_write.add_names(&mut ch, f.header())?;
}
w.write_all(ch.get_head(f.delim()).as_bytes())?;
}
if f.is_done() {
return Ok(());
}
f.do_split(comp.need_split());
let mut matches = 1;
if !agg.is_empty() {
agg.add(f.curr_line());
let mut tmp = f.curr_line().clone();
loop {
if f.getline()? {
c_write.write(&mut w.0, &tmp)?;
break;
}
if comp.equal_cols(f.prev_line(1), f.curr_line()) {
count.assign(&mut tmp, f.curr_line());
agg.add(f.curr_line());
} else {
c_write.write(&mut w.0, &tmp)?;
tmp.assign(f.curr_line());
agg.reset();
agg.add(f.curr_line());
}
}
} else if count.which == Which::Last {
loop {
if f.getline()? {
count.write(&mut w.0, matches, f.prev_line(1).line(), f.delim())?;
break;
}
if comp.equal_cols(f.prev_line(1), f.curr_line()) {
matches += 1;
} else {
count.write(&mut w.0, matches, f.prev_line(1).line(), f.delim())?;
matches = 1;
}
}
} else if count.which == Which::First && count.is_plain() {
f.write_curr(&mut w.0)?;
loop {
if f.getline()? {
break;
}
if !comp.equal_cols(f.prev_line(1), f.curr_line()) {
f.write_curr(&mut w.0)?;
}
}
} else {
let mut tmp = f.curr_line().clone();
loop {
if f.getline()? {
count.write(&mut w.0, matches, tmp.line(), f.delim())?;
break;
}
if comp.equal_cols(f.prev_line(1), f.curr_line()) {
count.assign(&mut tmp, f.curr_line());
matches += 1;
} else {
count.write(&mut w.0, matches, tmp.line(), f.delim())?;
tmp.assign(f.curr_line());
matches = 1;
}
}
}
Ok(())
}
sourcepub fn write_prev(&self, w: &mut impl Write, lookback: usize) -> Result<()>
pub fn write_prev(&self, w: &mut impl Write, lookback: usize) -> Result<()>
write previous text line with newline
sourcepub fn write_header(&self, w: &mut impl Write) -> Result<()>
pub fn write_header(&self, w: &mut impl Write) -> Result<()>
write header
Examples found in repository
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
fn join(&mut self, config: &JoinConfig) -> Result<()> {
if config.infiles.len() < 2 {
return err!(
"Join requires at least two input files, {} found",
config.infiles.len()
);
}
for x in &config.infiles {
self.r.push(Reader::new_open(x)?);
}
for _x in 0..config.infiles.len() {
self.no_match.push(None)
}
for x in &config.unmatch_out {
if (x.file_num < 1) || (x.file_num > config.infiles.len()) {
return err!(
"Join had {} input files, but requested non matching lines from file {}",
config.infiles.len(),
x.file_num
);
}
let num = x.file_num - 1;
if self.no_match[num].is_none() {
let mut w = get_writer(&x.file_name)?;
self.r[num].write_header(&mut *w)?;
self.no_match[num] = Some(w);
} else {
return err!("Multiple uses of --also for file {}", x.file_num);
}
}
if config.keys.is_empty() {
self.comp.push(CompMaker::make_line_comp("1")?);
} else {
for x in &config.keys {
self.comp.push(CompMaker::make_line_comp(x)?);
}
}
for i in 0..self.r.len() {
self.comp.lookup_n(&self.r[i].names(), i)?;
}
if config.col_specs.is_empty() {
for f in 0..self.r.len() {
let used = self.comp.used_cols(f);
for x in 0..self.r[f].names().len() {
if (f == 0) || !used.contains(&x) {
self.out_cols.push(OneOutCol::new_plain(f, x));
}
}
}
} else {
for x in &config.col_specs {
let mut x = x.clone();
if x.file >= self.r.len() {
return err!(
"{} input files, but file {} referred to as an output column",
self.r.len(),
x.file
);
}
x.cols.lookup(&self.r[x.file].names())?;
for y in x.cols.get_cols() {
self.out_cols.push(OneOutCol::new(x.file, y));
}
}
}
if self.out_cols.is_empty() {
return err!("No output columns specified");
}
if self.r[0].has_header() {
self.yes_match.write_all(b" CDX")?;
for x in &self.out_cols {
self.yes_match.write_all(&[config.out_delim])?;
x.write_head(&mut *self.yes_match, &self.r)?;
}
self.yes_match.0.write_all(&[b'\n'])?;
}
if config.jtype == JoinType::Quick {
self.join_quick(config)
} else {
err!("Only quick supported")
}
}
sourcepub const fn header(&self) -> &StringLine
pub const fn header(&self) -> &StringLine
write header
Examples found in repository
More examples
1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421
pub fn new_open_with(name: &str, lookback: usize) -> Result<Self> {
let mut lines: Vec<TextLine> = Vec::new();
lines.resize(lookback + 1, TextLine::new());
let mut tmp = Self {
file: get_reader(name)?,
lines,
cont: InfileContext::new(),
do_split: true,
curr: 0,
loc: FileLocData::default(),
};
tmp.cont.read_header(&mut *tmp.file, &mut tmp.lines[0])?;
tmp.loc.name = name.to_string();
tmp.loc.line = 1;
tmp.loc.bytes = if tmp.has_header() {
tmp.header().line.len()
} else {
0
};
Ok(tmp)
}
/// get current line contents, without the trailing newline
pub fn curr_nl(&self) -> &[u8] {
let line = self.curr_line();
&line.line[0..line.line.len() - 1]
}
/// get previous line contents, without the trailing newline
pub fn prev_nl(&self, n: usize) -> &[u8] {
let line = self.prev_line(n);
&line.line[0..line.line.len() - 1]
}
/// get delimiter
pub const fn delim(&self) -> u8 {
self.cont.delim
}
/// get column names
pub fn names(&self) -> Vec<&str> {
self.cont.header.vec()
}
/// write the current text line with newline
pub fn write(&self, w: &mut impl Write) -> Result<()> {
w.write_all(&self.curr_line().line)?;
Ok(())
}
/// open file for reading
pub fn open(&mut self, name: &str) -> Result<()> {
self.file = get_reader(name)?;
self.cont.read_header(&mut *self.file, &mut self.lines[0])
}
/// The full text of the header, without the trailing newline
pub const fn header_line(&self) -> &String {
&self.cont.header.line
}
/// was file zero bytes?
pub const fn is_empty(&self) -> bool {
self.cont.is_empty
}
/// have we hit EOF?
pub const fn is_done(&self) -> bool {
self.cont.is_done
}
/// line number of curr_line
pub const fn line_number(&self) -> usize {
self.loc.line
}
fn incr(&mut self) {
self.loc.line += 1;
self.curr += 1;
if self.curr >= self.lines.len() {
self.curr = 0;
}
}
/// get next line of text
pub fn getline(&mut self) -> Result<bool> {
self.loc.bytes += self.curr().line.len();
self.incr();
if self.lines[self.curr].read(&mut *self.file)? {
self.cont.is_done = true;
} else if self.do_split {
self.lines[self.curr].split(self.cont.delim);
}
Ok(self.cont.is_done)
}
/// get current line of text
pub fn curr_line(&self) -> &TextLine {
&self.lines[self.curr]
}
/// get current line of text
pub fn curr_mut(&mut self) -> &mut TextLine {
&mut self.lines[self.curr]
}
/// get current line of text
pub fn curr(&self) -> &TextLine {
&self.lines[self.curr]
}
/// get a previous line of text
/// looking back from the start of the file shows empty lines.
pub fn prev_line(&self, lookback: usize) -> &TextLine {
if lookback <= self.curr {
&self.lines[self.curr - lookback]
} else {
&self.lines[self.curr + self.lines.len() - lookback]
}
}
/// write the current text line with newline
pub fn write_curr(&self, w: &mut impl Write) -> Result<()> {
w.write_all(&self.curr_line().line)?;
Ok(())
}
/// write previous text line with newline
pub fn write_prev(&self, w: &mut impl Write, lookback: usize) -> Result<()> {
w.write_all(&self.prev_line(lookback).line)?;
Ok(())
}
/// write header
pub fn write_header(&self, w: &mut impl Write) -> Result<()> {
w.write_all(self.cont.header.line.as_bytes())?;
Ok(())
}
/// write header
pub const fn header(&self) -> &StringLine {
&self.cont.header
}
/// write header
pub const fn has_header(&self) -> bool {
self.cont.has_header
}
}
/// print a bunch of u8 to stderr, adding a newline
pub fn prerr(data: &[&[u8]]) {
for x in data {
std::io::stderr().write_all(x).unwrap();
}
std::io::stderr().write_all(b"\n").unwrap();
}
/// print a bunch of u8 to stderr
pub fn prerr_n(data: &[&[u8]]) {
for x in data {
std::io::stderr().write_all(x).unwrap();
}
}
/*
fn bytes_equal(c1: u8, c2: u8, ic: bool) -> bool {
if c1 == c2 {
return true;
}
if c1 == b'?' {
return true;
}
if !ic {
return false;
}
c1.to_ascii_lowercase() == c2.to_ascii_lowercase()
}
fn chars_equal(c1: char, c2: char, ic: bool) -> bool {
if c1 == c2 {
return true;
}
if c1 == '?' {
return true;
}
if !ic {
return false;
}
c1.to_lowercase().eq(c2.to_lowercase())
}
*/
/*
/// match in glob format
pub fn bglob(mut wild: &[u8], mut buff: &[u8], ic: bool) -> bool {
while !buff.is_empty() && !wild.is_empty() && (wild[0] != b'*') {
if !bytes_equal(wild[0], buff[0], ic) {
return false;
}
wild = &wild[1..];
buff = &buff[1..];
}
if wild.is_empty() && !buff.is_empty() {
return false;
}
let mut cp: &[u8] = &[];
let mut mp: &[u8] = &[];
while !buff.is_empty() {
if !wild.is_empty() && (wild[0] == b'*') {
wild = &wild[1..];
if wild.is_empty() {
return true;
}
mp = wild;
cp = &buff[1..];
} else if !wild.is_empty() && bytes_equal(wild[0], buff[0], ic) {
wild = &wild[1..];
buff = &buff[1..];
} else {
wild = mp;
cp = &cp[1..];
buff = cp;
}
}
while !wild.is_empty() && (wild[0] == b'*') {
wild = &wild[1..];
}
wild.is_empty()
}
fn first(s: &str) -> char {
debug_assert!(!s.is_empty());
s.chars().next().unwrap()
}
*/
//fn first_len(s : &str) -> usize {
// s.chars().next().unwrap().len_utf8()
//}
/*
fn skip_first(s: &str) -> &str {
debug_assert!(!s.is_empty());
&s[s.chars().next().unwrap().len_utf8()..]
}
fn take_first(s: &mut &str) -> char {
debug_assert!(!s.is_empty());
let x = s.chars().next().unwrap();
*s = &s[x.len_utf8()..];
x
}
/// match in glob format
pub fn sglob(mut wild: &str, mut buff: &str, ic: bool) -> bool {
while !buff.is_empty() && !wild.is_empty() && (first(wild) != '*') {
if !chars_equal(first(wild), first(buff), ic) {
return false;
}
wild = skip_first(wild);
buff = skip_first(buff);
}
if wild.is_empty() && !buff.is_empty() {
return false;
}
let mut cp: &str = "";
let mut mp: &str = "";
while !buff.is_empty() {
if !wild.is_empty() && (first(wild) == '*') {
wild = &wild[1..];
if wild.is_empty() {
return true;
}
mp = wild;
cp = skip_first(buff);
} else if !wild.is_empty() && chars_equal(first(wild), first(buff), ic) {
wild = skip_first(wild);
buff = skip_first(buff);
} else {
wild = mp;
cp = skip_first(cp);
buff = cp;
}
}
while !wild.is_empty() && (first(wild) == '*') {
wild = skip_first(wild);
}
wild.is_empty()
}
*/
/// How to combine headers from multiple sources
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum HeaderMode {
/// First can be anything, others must match
Match,
/// First must be CDX, others must match
Require,
/// Any CDX are removed
Strip,
/// No CDX allowed
None,
/// First can be anything, others blindly accepted
Trust,
/// Ignore CDX is present
Ignore,
}
/// strings associated with HeaderMode
pub const HEADER_MODE: [&str; 6] = ["Match", "Require", "Strip", "None", "Trust", "Ignore"];
impl FromStr for HeaderMode {
type Err = Error;
fn from_str(spec: &str) -> Result<Self> {
if spec.to_ascii_lowercase() == "match" {
Ok(Self::Match)
} else if spec.to_ascii_lowercase() == "require" {
Ok(Self::Require)
} else if spec.to_ascii_lowercase() == "strip" {
Ok(Self::Strip)
} else if spec.to_ascii_lowercase() == "none" {
Ok(Self::None)
} else if spec.to_ascii_lowercase() == "trust" {
Ok(Self::Trust)
} else if spec.to_ascii_lowercase() == "ignore" {
Ok(Self::Ignore)
} else {
err!(
"Input Header Mode must be one of Match, Require, Strip, None or Trust : {}",
spec
)
}
}
}
impl Default for HeaderMode {
fn default() -> Self {
Self::Match
}
}
/// Object to enforce HeaderMode
#[derive(Debug, Default, Clone)]
pub struct HeaderChecker {
/// the mode to enforce
pub mode: HeaderMode,
/// the first header seen
head: Vec<u8>,
/// true after first header has been processed
saw_one: bool,
}
/// Is this line a CDX header?
pub fn is_cdx(data: &[u8]) -> bool {
data.starts_with(b" CDX")
// check for more validity?
}
fn is_valid_cdx(data_in: &[u8], mode: HeaderMode, fname: &str) -> Result<bool> {
if mode == HeaderMode::Ignore {
return Ok(false);
}
if !data_in.starts_with(b" CDX") {
return Ok(false);
}
if mode == HeaderMode::Strip || mode == HeaderMode::None {
return Ok(true);
}
let mut data = data_in;
if data.last().unwrap() == &b'\n' {
data = &data[..data.len() - 1];
}
if data.len() < 6 {
return err!("File {} has an oddly truncated header line", fname);
}
let delim = data[4];
if delim == b'\n' || delim.is_ascii_alphanumeric() || delim > 127 {
return err!("Header for file {} has an invalid column delimiter", fname);
}
let data = str::from_utf8(&data[5..])?;
let delim = char::from_u32(delim as u32).unwrap();
for x in data.split(|ch| ch == delim) {
if x.is_empty() {
return err!("File {} has an empty column name", fname);
}
if !x.first().is_alphabetic() {
return err!("Header for file {} has column name {} which does not start with an alphabetic character.", fname, x);
}
for ch in x.chars() {
if !ch.is_alphanumeric() && ch != '_' {
return err!("Header for file {} has column name {} which contains something other than alphnumeric and underscore.", fname, x);
}
}
}
Ok(true)
}
impl HeaderChecker {
/// new
pub fn new() -> Self {
Self::default()
}
/// new with mode
pub fn from_mode(mode: HeaderMode) -> Self {
Self {
mode,
..Self::default()
}
}
/// call for the first line of every input file
/// return true if the header should be written
pub fn check_file(&mut self, file: &Reader, fname: &str) -> Result<bool> {
let first = !self.saw_one;
if file.has_header() {
self.check(file.header().line.as_bytes(), fname)?;
} else {
self.check(b"fake", fname)?;
}
Ok(file.has_header() && first)
}
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
pub fn transpose(file: &str, head: bool, max_lines: usize) -> Result<()> {
let mut f = Reader::new();
f.open(file)?;
if f.is_empty() {
return Ok(());
}
let mut data = Vec::new();
let mut lines = 0;
while lines < max_lines {
data.push(f.curr().clone());
lines += 1;
if f.getline()? {
break;
}
}
let mut w = get_writer("-")?;
if head {
w.write_all(b" CDX\t")?;
}
for i in 0..f.header().len() {
let mut need_tab = if f.has_header() {
w.write_all(f.header()[i].as_bytes())?;
true
} else {
false
};
for x in &data {
if need_tab {
w.write_all(b"\t")?;
}
need_tab = true;
w.write_all(&x[i])?;
}
w.write_all(b"\n")?;
}
Ok(())
}
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
pub fn merge_t2(
&self,
in_files: &[String],
cmp: &mut LineCompList,
mut w: impl Write,
unique: bool,
_tmp: &TempDir,
) -> Result<()> {
if in_files.is_empty() {
return Ok(());
}
if in_files.len() == 1 && !unique {
let r = get_reader(&in_files[0])?;
return copy(r.0, w);
}
let mc = Rc::new(RefCell::new(MergeContext{open : Vec::with_capacity(in_files.len()), cmp}));
let mut heap = BinaryHeap::new_by(|a: &usize, b: &usize| mc.borrow_mut().compare(*a, *b));
{
let mut mcm = mc.borrow_mut();
for x in in_files {
mcm.open.push(Reader::new_open(x)?);
}
if !mcm.cmp.need_split() {
for x in &mut mcm.open {
x.do_split(false);
}
}
// FIXME -- Check Header
if mcm.open[0].has_header() {
w.write_all(mcm.open[0].header().line.as_bytes())?;
}
}
for i in 0..in_files.len() {
if !mc.borrow().open[i].is_done() {
heap.push(i)
}
}
if unique {
if heap.is_empty() {
return Ok(());
}
let first = heap.pop().unwrap();
let mut prev = mc.borrow().open[first].curr_line().clone();
if !mc.borrow_mut().open[first].getline()? {
heap.push(first);
}
w.write_all(prev.line())?;
while !heap.is_empty() {
if let Some(x) = heap.pop() {
let eq = mc.borrow_mut().equal(&prev, x);
if !eq {
let mcm = mc.borrow();
w.write_all(mcm.open[x].curr_line().line())?;
prev.assign(mcm.open[x].curr_line());
}
if !mc.borrow_mut().open[x].getline()? {
heap.push(x);
}
}
}
} else {
while !heap.is_empty() {
if let Some(x) = heap.pop() {
w.write_all(mc.borrow_mut().open[x].curr_line().line())?;
if !mc.borrow_mut().open[x].getline()? {
heap.push(x);
}
}
}
}
Ok(())
}
/// merge all the files into w, using tmp
pub fn merge_t1 (
&self,
in_files: &[String],
cmp: &mut LineCompList,
mut w: impl Write,
unique: bool,
_tmp: &TempDir,
) -> Result<()> {
if in_files.is_empty() {
return Ok(());
}
if in_files.len() == 1 && !unique {
let r = get_reader(&in_files[0])?;
return copy(r.0, w);
}
let mut open_files: Vec<Reader> = Vec::with_capacity(in_files.len());
for x in in_files {
open_files.push(Reader::new_open(x)?);
}
if !cmp.need_split() {
for x in &mut open_files {
x.do_split(false);
}
}
// FIXME -- Check Header
if open_files[0].has_header() {
w.write_all(open_files[0].header().line.as_bytes())?;
}
let nums: Vec<usize> = (0..open_files.len()).collect();
let mut mm = MergeTreeItem::new_tree(&open_files, &nums);
if unique {
let x = mm.next(cmp, &mut open_files)?;
if x.is_none() {
return Ok(());
}
let x = x.unwrap();
w.write_all(open_files[x].curr_line().line())?;
let mut prev = open_files[x].curr_line().clone();
loop {
let x = mm.next(cmp, &mut open_files)?;
if x.is_none() {
break;
}
let x = x.unwrap();
if !cmp.equal_cols(&prev, open_files[x].curr_line()) {
w.write_all(open_files[x].curr_line().line())?;
}
prev.assign(open_files[x].curr_line());
}
} else {
loop {
let x = mm.next(cmp, &mut open_files)?;
if x.is_none() {
break;
}
let x = x.unwrap();
w.write_all(open_files[x].curr_line().line())?;
}
}
Ok(())
}
/// merge all the files into w
pub fn merge(&self, files: &[String], cmp: &mut LineCompList, w: impl Write, unique: bool) -> Result<()> {
let tmp = TempDir::new("merge")?;
if self.alt_merge {
self.merge_t1(files, cmp, w, unique, &tmp)
} else {
self.merge_t2(files, cmp, w, unique, &tmp)
}
}
/// given two file names, merge them into output
pub fn merge_2(
&self,
left: &str,
right: &str,
cmp: &mut LineCompList,
mut w: impl Write,
unique: bool,
) -> Result<()> {
let mut left_file = Reader::new();
let mut right_file = Reader::new();
left_file.open(left)?;
right_file.open(right)?;
left_file.do_split(false);
right_file.do_split(false);
cmp.lookup(&left_file.names())?;
// FIXME -- Check Header
if left_file.has_header() {
w.write_all(left_file.header().line.as_bytes())?;
}
if unique {
let mut prev: Vec<u8> = Vec::new();
while !left_file.is_done() && !right_file.is_done() {
let ord = cmp.comp_lines(left_file.curr().line(), right_file.curr().line());
if ord == Ordering::Less {
left_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
left_file.getline()?;
} else if ord == Ordering::Greater {
right_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
right_file.getline()?;
} else {
left_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
left_file.getline()?;
right_file.getline()?;
}
while !left_file.is_done() && cmp.equal_lines(left_file.curr().line(), &prev) {
left_file.getline()?;
}
while !right_file.is_done() && cmp.equal_lines(right_file.curr().line(), &prev) {
right_file.getline()?;
}
}
} else {
while !left_file.is_done() && !right_file.is_done() {
let ord = cmp.comp_lines(left_file.curr().line(), right_file.curr().line());
// if Equal, write both lines
if ord != Ordering::Less {
right_file.write(&mut w)?;
right_file.getline()?;
}
if ord != Ordering::Greater {
left_file.write(&mut w)?;
left_file.getline()?;
}
}
}
while !left_file.is_done() {
left_file.write(&mut w)?;
left_file.getline()?;
}
while !right_file.is_done() {
right_file.write(&mut w)?;
right_file.getline()?;
}
Ok(())
}
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
pub fn show(file: &str, screen: &Rect) -> Result<()> {
let mut f = Reader::new();
f.open(file)?;
if f.is_empty() {
return Ok(());
}
let mut lines: Vec<StringLine> = Vec::new();
let mut sizes: Vec<usize> = Vec::new();
sizes.resize(f.header().len(), 0);
if f.has_header() {
lines.push(f.header().clone());
}
if !f.is_done() {
while lines.len() < screen.height {
let mut s = StringLine::new();
s.line = String::from_utf8_lossy(f.curr().line()).to_string();
s.split(f.delim());
lines.push(s);
if f.getline()? {
break;
}
}
}
for x in &lines {
for (i, c) in x.iter().enumerate() {
let width = UnicodeWidthStr::width(c);
if sizes[i] < width {
sizes[i] = width;
}
}
}
let mut total: usize = sizes.iter().sum();
let target = screen.width - sizes.len();
while total > target {
dec_max(&mut sizes);
total = sizes.iter().sum();
}
let mut w = get_writer("-")?;
let mut do_center = f.has_header();
for x in &lines {
let mut need_space = false;
for (c, y) in x.iter().enumerate() {
let (nstr, width) = y.unicode_truncate(sizes[c]);
if need_space {
w.write_all(b" ")?;
}
let num = (sizes[c] - width) / 2;
if do_center {
for _ in 0..num {
w.write_all(b" ")?;
}
}
w.write_all(nstr.as_bytes())?;
if do_center {
let num2 = (sizes[c] - width) - num;
for _ in 0..num2 {
w.write_all(b" ")?;
}
} else {
for _ in width..sizes[c] {
w.write_all(b" ")?;
}
}
need_space = true;
}
do_center = false;
w.write_all(b"\n")?;
}
Ok(())
}
/// show the file in a specific rectangle
pub fn show2(file: &str, screen: &Rect, w: &mut Vec<String>) -> Result<usize> {
let mut f = Reader::new();
f.open(file)?;
if f.is_empty() {
return Ok(0);
}
let mut lines: Vec<StringLine> = Vec::new();
let mut sizes: Vec<usize> = Vec::new();
sizes.resize(f.header().len(), 0);
if f.has_header() {
lines.push(f.header().clone());
}
if !f.is_done() {
while lines.len() < screen.height {
let mut s = StringLine::new();
s.line = String::from_utf8_lossy(f.curr().line()).to_string();
s.split(f.delim());
lines.push(s);
if f.getline()? {
break;
}
}
}
for x in &lines {
for (i, c) in x.iter().enumerate() {
let width = UnicodeWidthStr::width(c);
if sizes[i] < width {
sizes[i] = width;
}
}
}
let mut total: usize = sizes.iter().sum();
let target = screen.width - sizes.len();
while total > target {
dec_max(&mut sizes);
total = sizes.iter().sum();
}
let mut do_center = f.has_header();
w.clear();
for x in &lines {
let mut s = String::new();
let mut need_space = false;
for (c, y) in x.iter().enumerate() {
let (nstr, width) = y.unicode_truncate(sizes[c]);
if need_space {
s.push(' ');
}
let num = (sizes[c] - width) / 2;
if do_center {
for _ in 0..num {
s.push(' ');
}
}
s.push_str(nstr);
if do_center {
let num2 = (sizes[c] - width) - num;
for _ in 0..num2 {
s.push(' ');
}
} else {
for _ in width..sizes[c] {
s.push(' ');
}
}
need_space = true;
}
do_center = false;
w.push(s);
}
Ok(sizes.iter().sum::<usize>() + sizes.len())
}
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Select columns", args::FileCount::Many);
const A: [ArgSpec; 5] = [
arg! {"fields", "f", "Columns", "the columns to select."},
arg! {"group", "g", "Columns", "the columns in a bunch, e.g. '.group:1-3'"},
arg! {"expr", "e", "Name:Expr", "The result of an arithmetic expression"},
arg! {"composite", "c", "Spec", "new value made from parts. e.g. 'stuff:abc^{two}def'"},
arg_enum! {"dups", "D", "Mode", "Duplicate Column Handling", &["Fail", "Allow", "Numeric"]},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut header = ColumnHeader::new();
let mut v = Writer::new(b'\t');
for x in args {
if x.name == "dups" {
header.set_handling(DupColHandling::new(&x.value)?);
} else if x.name == "fields" {
v.push(Box::new(ReaderColumns::new(ColumnSet::from_spec(
&x.value,
)?)));
} else if x.name == "group" {
v.push(Box::new(ColumnClump::from_spec(&x.value)?));
} else if x.name == "expr" {
v.push(Box::new(ColumnExpr::new(&x.value)?));
} else if x.name == "composite" {
v.push(Box::new(CompositeColumn::new(&x.value)?));
} else {
unreachable!();
}
}
if v.is_empty() {
bail!("cut requires at lease one --columns or --groups");
}
let mut w = get_writer("-")?;
let mut not_header = String::new();
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if f.is_empty() {
continue;
}
v.lookup(&f.names())?;
header.clear();
not_header.clear();
v.add_names(&mut header, f.header())?;
if f.has_header() {
not_header = header.get_head(b'\t');
}
if settings.checker.check(not_header.as_bytes(), x)? {
w.write_all(not_header.as_bytes())?;
}
if f.is_done() {
continue;
}
loop {
v.write(&mut w.0, f.curr())?;
if f.getline()? {
break;
}
}
}
Ok(())
}
sourcepub const fn has_header(&self) -> bool
pub const fn has_header(&self) -> bool
write header
Examples found in repository
1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421
pub fn new_open_with(name: &str, lookback: usize) -> Result<Self> {
let mut lines: Vec<TextLine> = Vec::new();
lines.resize(lookback + 1, TextLine::new());
let mut tmp = Self {
file: get_reader(name)?,
lines,
cont: InfileContext::new(),
do_split: true,
curr: 0,
loc: FileLocData::default(),
};
tmp.cont.read_header(&mut *tmp.file, &mut tmp.lines[0])?;
tmp.loc.name = name.to_string();
tmp.loc.line = 1;
tmp.loc.bytes = if tmp.has_header() {
tmp.header().line.len()
} else {
0
};
Ok(tmp)
}
/// get current line contents, without the trailing newline
pub fn curr_nl(&self) -> &[u8] {
let line = self.curr_line();
&line.line[0..line.line.len() - 1]
}
/// get previous line contents, without the trailing newline
pub fn prev_nl(&self, n: usize) -> &[u8] {
let line = self.prev_line(n);
&line.line[0..line.line.len() - 1]
}
/// get delimiter
pub const fn delim(&self) -> u8 {
self.cont.delim
}
/// get column names
pub fn names(&self) -> Vec<&str> {
self.cont.header.vec()
}
/// write the current text line with newline
pub fn write(&self, w: &mut impl Write) -> Result<()> {
w.write_all(&self.curr_line().line)?;
Ok(())
}
/// open file for reading
pub fn open(&mut self, name: &str) -> Result<()> {
self.file = get_reader(name)?;
self.cont.read_header(&mut *self.file, &mut self.lines[0])
}
/// The full text of the header, without the trailing newline
pub const fn header_line(&self) -> &String {
&self.cont.header.line
}
/// was file zero bytes?
pub const fn is_empty(&self) -> bool {
self.cont.is_empty
}
/// have we hit EOF?
pub const fn is_done(&self) -> bool {
self.cont.is_done
}
/// line number of curr_line
pub const fn line_number(&self) -> usize {
self.loc.line
}
fn incr(&mut self) {
self.loc.line += 1;
self.curr += 1;
if self.curr >= self.lines.len() {
self.curr = 0;
}
}
/// get next line of text
pub fn getline(&mut self) -> Result<bool> {
self.loc.bytes += self.curr().line.len();
self.incr();
if self.lines[self.curr].read(&mut *self.file)? {
self.cont.is_done = true;
} else if self.do_split {
self.lines[self.curr].split(self.cont.delim);
}
Ok(self.cont.is_done)
}
/// get current line of text
pub fn curr_line(&self) -> &TextLine {
&self.lines[self.curr]
}
/// get current line of text
pub fn curr_mut(&mut self) -> &mut TextLine {
&mut self.lines[self.curr]
}
/// get current line of text
pub fn curr(&self) -> &TextLine {
&self.lines[self.curr]
}
/// get a previous line of text
/// looking back from the start of the file shows empty lines.
pub fn prev_line(&self, lookback: usize) -> &TextLine {
if lookback <= self.curr {
&self.lines[self.curr - lookback]
} else {
&self.lines[self.curr + self.lines.len() - lookback]
}
}
/// write the current text line with newline
pub fn write_curr(&self, w: &mut impl Write) -> Result<()> {
w.write_all(&self.curr_line().line)?;
Ok(())
}
/// write previous text line with newline
pub fn write_prev(&self, w: &mut impl Write, lookback: usize) -> Result<()> {
w.write_all(&self.prev_line(lookback).line)?;
Ok(())
}
/// write header
pub fn write_header(&self, w: &mut impl Write) -> Result<()> {
w.write_all(self.cont.header.line.as_bytes())?;
Ok(())
}
/// write header
pub const fn header(&self) -> &StringLine {
&self.cont.header
}
/// write header
pub const fn has_header(&self) -> bool {
self.cont.has_header
}
}
/// print a bunch of u8 to stderr, adding a newline
pub fn prerr(data: &[&[u8]]) {
for x in data {
std::io::stderr().write_all(x).unwrap();
}
std::io::stderr().write_all(b"\n").unwrap();
}
/// print a bunch of u8 to stderr
pub fn prerr_n(data: &[&[u8]]) {
for x in data {
std::io::stderr().write_all(x).unwrap();
}
}
/*
fn bytes_equal(c1: u8, c2: u8, ic: bool) -> bool {
if c1 == c2 {
return true;
}
if c1 == b'?' {
return true;
}
if !ic {
return false;
}
c1.to_ascii_lowercase() == c2.to_ascii_lowercase()
}
fn chars_equal(c1: char, c2: char, ic: bool) -> bool {
if c1 == c2 {
return true;
}
if c1 == '?' {
return true;
}
if !ic {
return false;
}
c1.to_lowercase().eq(c2.to_lowercase())
}
*/
/*
/// match in glob format
pub fn bglob(mut wild: &[u8], mut buff: &[u8], ic: bool) -> bool {
while !buff.is_empty() && !wild.is_empty() && (wild[0] != b'*') {
if !bytes_equal(wild[0], buff[0], ic) {
return false;
}
wild = &wild[1..];
buff = &buff[1..];
}
if wild.is_empty() && !buff.is_empty() {
return false;
}
let mut cp: &[u8] = &[];
let mut mp: &[u8] = &[];
while !buff.is_empty() {
if !wild.is_empty() && (wild[0] == b'*') {
wild = &wild[1..];
if wild.is_empty() {
return true;
}
mp = wild;
cp = &buff[1..];
} else if !wild.is_empty() && bytes_equal(wild[0], buff[0], ic) {
wild = &wild[1..];
buff = &buff[1..];
} else {
wild = mp;
cp = &cp[1..];
buff = cp;
}
}
while !wild.is_empty() && (wild[0] == b'*') {
wild = &wild[1..];
}
wild.is_empty()
}
fn first(s: &str) -> char {
debug_assert!(!s.is_empty());
s.chars().next().unwrap()
}
*/
//fn first_len(s : &str) -> usize {
// s.chars().next().unwrap().len_utf8()
//}
/*
fn skip_first(s: &str) -> &str {
debug_assert!(!s.is_empty());
&s[s.chars().next().unwrap().len_utf8()..]
}
fn take_first(s: &mut &str) -> char {
debug_assert!(!s.is_empty());
let x = s.chars().next().unwrap();
*s = &s[x.len_utf8()..];
x
}
/// match in glob format
pub fn sglob(mut wild: &str, mut buff: &str, ic: bool) -> bool {
while !buff.is_empty() && !wild.is_empty() && (first(wild) != '*') {
if !chars_equal(first(wild), first(buff), ic) {
return false;
}
wild = skip_first(wild);
buff = skip_first(buff);
}
if wild.is_empty() && !buff.is_empty() {
return false;
}
let mut cp: &str = "";
let mut mp: &str = "";
while !buff.is_empty() {
if !wild.is_empty() && (first(wild) == '*') {
wild = &wild[1..];
if wild.is_empty() {
return true;
}
mp = wild;
cp = skip_first(buff);
} else if !wild.is_empty() && chars_equal(first(wild), first(buff), ic) {
wild = skip_first(wild);
buff = skip_first(buff);
} else {
wild = mp;
cp = skip_first(cp);
buff = cp;
}
}
while !wild.is_empty() && (first(wild) == '*') {
wild = skip_first(wild);
}
wild.is_empty()
}
*/
/// How to combine headers from multiple sources
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum HeaderMode {
/// First can be anything, others must match
Match,
/// First must be CDX, others must match
Require,
/// Any CDX are removed
Strip,
/// No CDX allowed
None,
/// First can be anything, others blindly accepted
Trust,
/// Ignore CDX is present
Ignore,
}
/// strings associated with HeaderMode
pub const HEADER_MODE: [&str; 6] = ["Match", "Require", "Strip", "None", "Trust", "Ignore"];
impl FromStr for HeaderMode {
type Err = Error;
fn from_str(spec: &str) -> Result<Self> {
if spec.to_ascii_lowercase() == "match" {
Ok(Self::Match)
} else if spec.to_ascii_lowercase() == "require" {
Ok(Self::Require)
} else if spec.to_ascii_lowercase() == "strip" {
Ok(Self::Strip)
} else if spec.to_ascii_lowercase() == "none" {
Ok(Self::None)
} else if spec.to_ascii_lowercase() == "trust" {
Ok(Self::Trust)
} else if spec.to_ascii_lowercase() == "ignore" {
Ok(Self::Ignore)
} else {
err!(
"Input Header Mode must be one of Match, Require, Strip, None or Trust : {}",
spec
)
}
}
}
impl Default for HeaderMode {
fn default() -> Self {
Self::Match
}
}
/// Object to enforce HeaderMode
#[derive(Debug, Default, Clone)]
pub struct HeaderChecker {
/// the mode to enforce
pub mode: HeaderMode,
/// the first header seen
head: Vec<u8>,
/// true after first header has been processed
saw_one: bool,
}
/// Is this line a CDX header?
pub fn is_cdx(data: &[u8]) -> bool {
data.starts_with(b" CDX")
// check for more validity?
}
fn is_valid_cdx(data_in: &[u8], mode: HeaderMode, fname: &str) -> Result<bool> {
if mode == HeaderMode::Ignore {
return Ok(false);
}
if !data_in.starts_with(b" CDX") {
return Ok(false);
}
if mode == HeaderMode::Strip || mode == HeaderMode::None {
return Ok(true);
}
let mut data = data_in;
if data.last().unwrap() == &b'\n' {
data = &data[..data.len() - 1];
}
if data.len() < 6 {
return err!("File {} has an oddly truncated header line", fname);
}
let delim = data[4];
if delim == b'\n' || delim.is_ascii_alphanumeric() || delim > 127 {
return err!("Header for file {} has an invalid column delimiter", fname);
}
let data = str::from_utf8(&data[5..])?;
let delim = char::from_u32(delim as u32).unwrap();
for x in data.split(|ch| ch == delim) {
if x.is_empty() {
return err!("File {} has an empty column name", fname);
}
if !x.first().is_alphabetic() {
return err!("Header for file {} has column name {} which does not start with an alphabetic character.", fname, x);
}
for ch in x.chars() {
if !ch.is_alphanumeric() && ch != '_' {
return err!("Header for file {} has column name {} which contains something other than alphnumeric and underscore.", fname, x);
}
}
}
Ok(true)
}
impl HeaderChecker {
/// new
pub fn new() -> Self {
Self::default()
}
/// new with mode
pub fn from_mode(mode: HeaderMode) -> Self {
Self {
mode,
..Self::default()
}
}
/// call for the first line of every input file
/// return true if the header should be written
pub fn check_file(&mut self, file: &Reader, fname: &str) -> Result<bool> {
let first = !self.saw_one;
if file.has_header() {
self.check(file.header().line.as_bytes(), fname)?;
} else {
self.check(b"fake", fname)?;
}
Ok(file.has_header() && first)
}
More examples
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
pub fn transpose(file: &str, head: bool, max_lines: usize) -> Result<()> {
let mut f = Reader::new();
f.open(file)?;
if f.is_empty() {
return Ok(());
}
let mut data = Vec::new();
let mut lines = 0;
while lines < max_lines {
data.push(f.curr().clone());
lines += 1;
if f.getline()? {
break;
}
}
let mut w = get_writer("-")?;
if head {
w.write_all(b" CDX\t")?;
}
for i in 0..f.header().len() {
let mut need_tab = if f.has_header() {
w.write_all(f.header()[i].as_bytes())?;
true
} else {
false
};
for x in &data {
if need_tab {
w.write_all(b"\t")?;
}
need_tab = true;
w.write_all(&x[i])?;
}
w.write_all(b"\n")?;
}
Ok(())
}
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
pub fn merge_t2(
&self,
in_files: &[String],
cmp: &mut LineCompList,
mut w: impl Write,
unique: bool,
_tmp: &TempDir,
) -> Result<()> {
if in_files.is_empty() {
return Ok(());
}
if in_files.len() == 1 && !unique {
let r = get_reader(&in_files[0])?;
return copy(r.0, w);
}
let mc = Rc::new(RefCell::new(MergeContext{open : Vec::with_capacity(in_files.len()), cmp}));
let mut heap = BinaryHeap::new_by(|a: &usize, b: &usize| mc.borrow_mut().compare(*a, *b));
{
let mut mcm = mc.borrow_mut();
for x in in_files {
mcm.open.push(Reader::new_open(x)?);
}
if !mcm.cmp.need_split() {
for x in &mut mcm.open {
x.do_split(false);
}
}
// FIXME -- Check Header
if mcm.open[0].has_header() {
w.write_all(mcm.open[0].header().line.as_bytes())?;
}
}
for i in 0..in_files.len() {
if !mc.borrow().open[i].is_done() {
heap.push(i)
}
}
if unique {
if heap.is_empty() {
return Ok(());
}
let first = heap.pop().unwrap();
let mut prev = mc.borrow().open[first].curr_line().clone();
if !mc.borrow_mut().open[first].getline()? {
heap.push(first);
}
w.write_all(prev.line())?;
while !heap.is_empty() {
if let Some(x) = heap.pop() {
let eq = mc.borrow_mut().equal(&prev, x);
if !eq {
let mcm = mc.borrow();
w.write_all(mcm.open[x].curr_line().line())?;
prev.assign(mcm.open[x].curr_line());
}
if !mc.borrow_mut().open[x].getline()? {
heap.push(x);
}
}
}
} else {
while !heap.is_empty() {
if let Some(x) = heap.pop() {
w.write_all(mc.borrow_mut().open[x].curr_line().line())?;
if !mc.borrow_mut().open[x].getline()? {
heap.push(x);
}
}
}
}
Ok(())
}
/// merge all the files into w, using tmp
pub fn merge_t1 (
&self,
in_files: &[String],
cmp: &mut LineCompList,
mut w: impl Write,
unique: bool,
_tmp: &TempDir,
) -> Result<()> {
if in_files.is_empty() {
return Ok(());
}
if in_files.len() == 1 && !unique {
let r = get_reader(&in_files[0])?;
return copy(r.0, w);
}
let mut open_files: Vec<Reader> = Vec::with_capacity(in_files.len());
for x in in_files {
open_files.push(Reader::new_open(x)?);
}
if !cmp.need_split() {
for x in &mut open_files {
x.do_split(false);
}
}
// FIXME -- Check Header
if open_files[0].has_header() {
w.write_all(open_files[0].header().line.as_bytes())?;
}
let nums: Vec<usize> = (0..open_files.len()).collect();
let mut mm = MergeTreeItem::new_tree(&open_files, &nums);
if unique {
let x = mm.next(cmp, &mut open_files)?;
if x.is_none() {
return Ok(());
}
let x = x.unwrap();
w.write_all(open_files[x].curr_line().line())?;
let mut prev = open_files[x].curr_line().clone();
loop {
let x = mm.next(cmp, &mut open_files)?;
if x.is_none() {
break;
}
let x = x.unwrap();
if !cmp.equal_cols(&prev, open_files[x].curr_line()) {
w.write_all(open_files[x].curr_line().line())?;
}
prev.assign(open_files[x].curr_line());
}
} else {
loop {
let x = mm.next(cmp, &mut open_files)?;
if x.is_none() {
break;
}
let x = x.unwrap();
w.write_all(open_files[x].curr_line().line())?;
}
}
Ok(())
}
/// merge all the files into w
pub fn merge(&self, files: &[String], cmp: &mut LineCompList, w: impl Write, unique: bool) -> Result<()> {
let tmp = TempDir::new("merge")?;
if self.alt_merge {
self.merge_t1(files, cmp, w, unique, &tmp)
} else {
self.merge_t2(files, cmp, w, unique, &tmp)
}
}
/// given two file names, merge them into output
pub fn merge_2(
&self,
left: &str,
right: &str,
cmp: &mut LineCompList,
mut w: impl Write,
unique: bool,
) -> Result<()> {
let mut left_file = Reader::new();
let mut right_file = Reader::new();
left_file.open(left)?;
right_file.open(right)?;
left_file.do_split(false);
right_file.do_split(false);
cmp.lookup(&left_file.names())?;
// FIXME -- Check Header
if left_file.has_header() {
w.write_all(left_file.header().line.as_bytes())?;
}
if unique {
let mut prev: Vec<u8> = Vec::new();
while !left_file.is_done() && !right_file.is_done() {
let ord = cmp.comp_lines(left_file.curr().line(), right_file.curr().line());
if ord == Ordering::Less {
left_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
left_file.getline()?;
} else if ord == Ordering::Greater {
right_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
right_file.getline()?;
} else {
left_file.write(&mut w)?;
mem::swap(&mut prev, left_file.curr_mut().raw());
left_file.getline()?;
right_file.getline()?;
}
while !left_file.is_done() && cmp.equal_lines(left_file.curr().line(), &prev) {
left_file.getline()?;
}
while !right_file.is_done() && cmp.equal_lines(right_file.curr().line(), &prev) {
right_file.getline()?;
}
}
} else {
while !left_file.is_done() && !right_file.is_done() {
let ord = cmp.comp_lines(left_file.curr().line(), right_file.curr().line());
// if Equal, write both lines
if ord != Ordering::Less {
right_file.write(&mut w)?;
right_file.getline()?;
}
if ord != Ordering::Greater {
left_file.write(&mut w)?;
left_file.getline()?;
}
}
}
while !left_file.is_done() {
left_file.write(&mut w)?;
left_file.getline()?;
}
while !right_file.is_done() {
right_file.write(&mut w)?;
right_file.getline()?;
}
Ok(())
}
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
pub fn show(file: &str, screen: &Rect) -> Result<()> {
let mut f = Reader::new();
f.open(file)?;
if f.is_empty() {
return Ok(());
}
let mut lines: Vec<StringLine> = Vec::new();
let mut sizes: Vec<usize> = Vec::new();
sizes.resize(f.header().len(), 0);
if f.has_header() {
lines.push(f.header().clone());
}
if !f.is_done() {
while lines.len() < screen.height {
let mut s = StringLine::new();
s.line = String::from_utf8_lossy(f.curr().line()).to_string();
s.split(f.delim());
lines.push(s);
if f.getline()? {
break;
}
}
}
for x in &lines {
for (i, c) in x.iter().enumerate() {
let width = UnicodeWidthStr::width(c);
if sizes[i] < width {
sizes[i] = width;
}
}
}
let mut total: usize = sizes.iter().sum();
let target = screen.width - sizes.len();
while total > target {
dec_max(&mut sizes);
total = sizes.iter().sum();
}
let mut w = get_writer("-")?;
let mut do_center = f.has_header();
for x in &lines {
let mut need_space = false;
for (c, y) in x.iter().enumerate() {
let (nstr, width) = y.unicode_truncate(sizes[c]);
if need_space {
w.write_all(b" ")?;
}
let num = (sizes[c] - width) / 2;
if do_center {
for _ in 0..num {
w.write_all(b" ")?;
}
}
w.write_all(nstr.as_bytes())?;
if do_center {
let num2 = (sizes[c] - width) - num;
for _ in 0..num2 {
w.write_all(b" ")?;
}
} else {
for _ in width..sizes[c] {
w.write_all(b" ")?;
}
}
need_space = true;
}
do_center = false;
w.write_all(b"\n")?;
}
Ok(())
}
/// show the file in a specific rectangle
pub fn show2(file: &str, screen: &Rect, w: &mut Vec<String>) -> Result<usize> {
let mut f = Reader::new();
f.open(file)?;
if f.is_empty() {
return Ok(0);
}
let mut lines: Vec<StringLine> = Vec::new();
let mut sizes: Vec<usize> = Vec::new();
sizes.resize(f.header().len(), 0);
if f.has_header() {
lines.push(f.header().clone());
}
if !f.is_done() {
while lines.len() < screen.height {
let mut s = StringLine::new();
s.line = String::from_utf8_lossy(f.curr().line()).to_string();
s.split(f.delim());
lines.push(s);
if f.getline()? {
break;
}
}
}
for x in &lines {
for (i, c) in x.iter().enumerate() {
let width = UnicodeWidthStr::width(c);
if sizes[i] < width {
sizes[i] = width;
}
}
}
let mut total: usize = sizes.iter().sum();
let target = screen.width - sizes.len();
while total > target {
dec_max(&mut sizes);
total = sizes.iter().sum();
}
let mut do_center = f.has_header();
w.clear();
for x in &lines {
let mut s = String::new();
let mut need_space = false;
for (c, y) in x.iter().enumerate() {
let (nstr, width) = y.unicode_truncate(sizes[c]);
if need_space {
s.push(' ');
}
let num = (sizes[c] - width) / 2;
if do_center {
for _ in 0..num {
s.push(' ');
}
}
s.push_str(nstr);
if do_center {
let num2 = (sizes[c] - width) - num;
for _ in 0..num2 {
s.push(' ');
}
} else {
for _ in width..sizes[c] {
s.push(' ');
}
}
need_space = true;
}
do_center = false;
w.push(s);
}
Ok(sizes.iter().sum::<usize>() + sizes.len())
}
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Select columns", args::FileCount::Many);
const A: [ArgSpec; 5] = [
arg! {"fields", "f", "Columns", "the columns to select."},
arg! {"group", "g", "Columns", "the columns in a bunch, e.g. '.group:1-3'"},
arg! {"expr", "e", "Name:Expr", "The result of an arithmetic expression"},
arg! {"composite", "c", "Spec", "new value made from parts. e.g. 'stuff:abc^{two}def'"},
arg_enum! {"dups", "D", "Mode", "Duplicate Column Handling", &["Fail", "Allow", "Numeric"]},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut header = ColumnHeader::new();
let mut v = Writer::new(b'\t');
for x in args {
if x.name == "dups" {
header.set_handling(DupColHandling::new(&x.value)?);
} else if x.name == "fields" {
v.push(Box::new(ReaderColumns::new(ColumnSet::from_spec(
&x.value,
)?)));
} else if x.name == "group" {
v.push(Box::new(ColumnClump::from_spec(&x.value)?));
} else if x.name == "expr" {
v.push(Box::new(ColumnExpr::new(&x.value)?));
} else if x.name == "composite" {
v.push(Box::new(CompositeColumn::new(&x.value)?));
} else {
unreachable!();
}
}
if v.is_empty() {
bail!("cut requires at lease one --columns or --groups");
}
let mut w = get_writer("-")?;
let mut not_header = String::new();
for x in &files {
let mut f = Reader::new();
f.open(x)?;
if f.is_empty() {
continue;
}
v.lookup(&f.names())?;
header.clear();
not_header.clear();
v.add_names(&mut header, f.header())?;
if f.has_header() {
not_header = header.get_head(b'\t');
}
if settings.checker.check(not_header.as_bytes(), x)? {
w.write_all(not_header.as_bytes())?;
}
if f.is_done() {
continue;
}
loop {
v.write(&mut w.0, f.curr())?;
if f.getline()? {
break;
}
}
}
Ok(())
}
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
pub fn main(argv: &[String], settings: &mut Settings) -> Result<()> {
let prog = args::ProgSpec::new("Select uniq lines.", args::FileCount::Many);
const A: [ArgSpec; 6] = [
arg! {"pattern", "p", "Col,Spec,Pattern", "Select line where this col matches this pattern."},
arg! {"show-const", "", "", "Print available constants"},
arg! {"show-func", "", "", "Print available functions"},
arg! {"or", "o", "", "A line matches if any of the matchers matches."},
arg! {"invert", "v", "", "Print lines that don't match."},
arg! {"location", "l", "name:what", "prefix extra columns of location context."},
];
let (args, files) = args::parse(&prog, &A, argv, settings)?;
let mut list = LineMatcherList::new_with(Combiner::And);
let mut reverse = false;
let mut loc = FileLocList::new();
for x in args {
if x.name == "pattern" {
list.push(&x.value)?;
} else if x.name == "or" {
list.multi = Combiner::Or;
} else if x.name == "invert" {
reverse = true;
} else if x.name == "location" {
loc.push(&x.value)?;
} else if x.name == "show-const" {
expr::show_const();
return Ok(());
} else if x.name == "show-func" {
expr::show_func();
return Ok(());
} else {
unreachable!();
}
}
let mut w = get_writer("-")?;
for x in &files {
let mut f = Reader::new_open(x)?;
if f.is_empty() {
continue;
}
list.lookup(&f.names())?;
let mut not_header = String::new();
let mut header = ColumnHeader::new();
loc.add(&mut header)?;
header.push_all(f.header())?;
if f.has_header() {
not_header = header.get_head(b'\t');
}
if settings.checker.check(not_header.as_bytes(), x)? {
w.write_all(not_header.as_bytes())?;
}
if f.is_done() {
continue;
}
loop {
if list.ok(f.curr_line()) ^ reverse {
// write previous lines of context if necessary
loc.write_data(&mut w.0, b'\t', f.loc())?;
f.write_curr(&mut w.0)?;
} else {
// write more lines of context if necessary
}
if f.getline()? {
break;
}
}
}
Ok(())
}
Trait Implementations
Auto Trait Implementations
impl !RefUnwindSafe for Reader
impl !Send for Reader
impl !Sync for Reader
impl Unpin for Reader
impl !UnwindSafe for Reader
Blanket Implementations
sourceimpl<T> BorrowMut<T> for T where
T: ?Sized,
impl<T> BorrowMut<T> for T where
T: ?Sized,
const: unstable · sourcefn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
sourceimpl<T> Instrument for T
impl<T> Instrument for T
sourcefn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
sourcefn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
impl<V, T> VZip<V> for T where
V: MultiLane<T>,
impl<V, T> VZip<V> for T where
V: MultiLane<T>,
fn vzip(self) -> V
sourceimpl<T> WithSubscriber for T
impl<T> WithSubscriber for T
sourcefn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self> where
S: Into<Dispatch>,
fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self> where
S: Into<Dispatch>,
Attaches the provided Subscriber
to this type, returning a
WithDispatch
wrapper. Read more
sourcefn with_current_subscriber(self) -> WithDispatch<Self>
fn with_current_subscriber(self) -> WithDispatch<Self>
Attaches the current default Subscriber
to this type, returning a
WithDispatch
wrapper. Read more