40 lines
1.1 KiB
Python
40 lines
1.1 KiB
Python
import sys
|
|
|
|
import click
|
|
|
|
|
|
def read(path):
|
|
with open(path) as h:
|
|
for line in h:
|
|
line = line.strip()
|
|
try:
|
|
b, s, p, *_ = line.split("\t")
|
|
except:
|
|
print(f"skipping line {line}", file=sys.stderr)
|
|
continue
|
|
else:
|
|
yield float(b), s, p
|
|
|
|
|
|
@click.command()
|
|
@click.argument("path")
|
|
def main(path):
|
|
data = list(read(path))
|
|
avg_len = sum(len(x[1]) for x in data)/len(data)
|
|
filtered_data = []
|
|
filtered_data2 = []
|
|
for x in data:
|
|
if len(x[1]) > avg_len:
|
|
filtered_data.append(x)
|
|
else:
|
|
filtered_data2.append(x)
|
|
print(f"avg sentence length {avg_len}")
|
|
print(f"long sentences {len(filtered_data)}")
|
|
print(f"short sentences {len(filtered_data2)}")
|
|
print(f"total bleu {sum(x[0] for x in data)/len(data)}")
|
|
print(f"longest bleu {sum(x[0] for x in filtered_data)/len(filtered_data)}")
|
|
print(f"shortest bleu {sum(x[0] for x in filtered_data2)/len(filtered_data2)}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|