from janome.tokenizer import Tokenizer

tokenizer = Tokenizer()

def tokenize_japanese(text):
    tokens = []
    print(f"\nAnalyzing: {text}")
    print("-" * 20)
    for token in tokenizer.tokenize(text):
        pos = token.part_of_speech.split(',')[0]
        print(f"Token: {token.surface} | POS: {pos}")
        if pos in ['名詞']: # Only nouns
            tokens.append(token.surface)
    return tokens

q1 = "ノアについて聞きたい。"
q2 = "ノアについて話したい。"

t1 = tokenize_japanese(q1)
print(f"\nKeywords for Q1: {t1}")

t2 = tokenize_japanese(q2)
print(f"Keywords for Q2: {t2}")

# Simulation Coverage
common = set(t1).intersection(set(t2))
coverage = len(common) / len(t1) if t1 else 0
print(f"\nCoverage: {coverage * 100}%")
