import sys import click def read(path): with open(path) as h: for line in h: line = line.strip() try: b, s, p, *_ = line.split("\t") except: print(f"skipping line {line}", file=sys.stderr) continue else: yield float(b), s, p @click.command() @click.argument("path") def main(path): data = list(read(path)) avg_len = sum(len(x[1]) for x in data)/len(data) filtered_data = [] filtered_data2 = [] for x in data: if len(x[1]) > avg_len: filtered_data.append(x) else: filtered_data2.append(x) print(f"avg sentence length {avg_len}") print(f"long sentences {len(filtered_data)}") print(f"short sentences {len(filtered_data2)}") print(f"total bleu {sum(x[0] for x in data)/len(data)}") print(f"longest bleu {sum(x[0] for x in filtered_data)/len(filtered_data)}") print(f"shortest bleu {sum(x[0] for x in filtered_data2)/len(filtered_data2)}") if __name__ == "__main__": main()