depot/third_party/nixpkgs/pkgs/development/python-modules/spacy/annotation-test/annotate.py

70 lines
1.7 KiB
Python
Raw Normal View History

import pytest
import spacy
en_text = (
"When Sebastian Thrun started working on self-driving cars at "
"Google in 2007, few people outside of the company took him "
"seriously. “I can tell you very senior CEOs of major American "
"car companies would shake my hand and turn away because I wasnt "
"worth talking to,” said Thrun, in an interview with Recode earlier "
"this week.")
@pytest.fixture
def en_core_web_sm():
return spacy.load("en_core_web_sm")
@pytest.fixture
def doc_en_core_web_sm(en_core_web_sm):
return en_core_web_sm(en_text)
def test_entities(doc_en_core_web_sm):
entities = list(map(lambda e: (e.text, e.label_),
doc_en_core_web_sm.ents))
assert entities == [
('Sebastian Thrun', 'PERSON'),
('Google', 'ORG'), ('2007', 'DATE'),
('American', 'NORP'),
('Thrun', 'ORG'),
('earlier this week', 'DATE')
]
def test_nouns(doc_en_core_web_sm):
assert [
chunk.text for chunk in doc_en_core_web_sm.noun_chunks] == [
'Sebastian Thrun',
'self-driving cars',
'Google',
'few people',
'the company',
'him',
'I',
'you',
'very senior CEOs',
'major American car companies',
'my hand',
'I',
'Thrun',
'an interview',
'Recode']
def test_verbs(doc_en_core_web_sm):
assert [
token.lemma_ for token in doc_en_core_web_sm if token.pos_ == "VERB"] == [
'start',
'work',
'drive',
'take',
'can',
'tell',
'would',
'shake',
'turn',
'talk',
'say']