human-gaze-guided-neural-at.../joint_paraphrase_model/utils/long_sentence_stats.py

41 lines
1.1 KiB
Python

import sys
import click
def read(path):
with open(path) as h:
for line in h:
line = line.strip()
try:
b, s, p, *_ = line.split("\t")
except:
print(f"skipping line {line}", file=sys.stderr)
continue
else:
yield float(b), s, p
@click.command()
@click.argument("path")
def main(path):
data = list(read(path))
avg_len = sum(len(x[1]) for x in data)/len(data)
filtered_data = []
filtered_data2 = []
for x in data:
if len(x[1]) > avg_len:
filtered_data.append(x)
else:
filtered_data2.append(x)
print(f"avg sentence length {avg_len}")
print(f"long sentences {len(filtered_data)}")
print(f"short sentences {len(filtered_data2)}")
print(f"total bleu {sum(x[0] for x in data)/len(data)}")
print(f"longest bleu {sum(x[0] for x in filtered_data)/len(filtered_data)}")
print(f"shortest bleu {sum(x[0] for x in filtered_data2)/len(filtered_data2)}")
if __name__ == "__main__":
main()