2020-08-20 17:08:02 +00:00
|
|
|
|
import pytest
|
|
|
|
|
import spacy
|
|
|
|
|
|
|
|
|
|
en_text = (
|
|
|
|
|
"When Sebastian Thrun started working on self-driving cars at "
|
|
|
|
|
"Google in 2007, few people outside of the company took him "
|
|
|
|
|
"seriously. “I can tell you very senior CEOs of major American "
|
|
|
|
|
"car companies would shake my hand and turn away because I wasn’t "
|
|
|
|
|
"worth talking to,” said Thrun, in an interview with Recode earlier "
|
|
|
|
|
"this week.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
|
def en_core_web_sm():
|
|
|
|
|
return spacy.load("en_core_web_sm")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
|
def doc_en_core_web_sm(en_core_web_sm):
|
|
|
|
|
return en_core_web_sm(en_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_entities(doc_en_core_web_sm):
|
|
|
|
|
entities = list(map(lambda e: (e.text, e.label_),
|
|
|
|
|
doc_en_core_web_sm.ents))
|
|
|
|
|
|
|
|
|
|
assert entities == [
|
|
|
|
|
('Sebastian Thrun', 'PERSON'),
|
2021-05-20 23:08:51 +00:00
|
|
|
|
('2007', 'DATE'),
|
2020-08-20 17:08:02 +00:00
|
|
|
|
('American', 'NORP'),
|
2021-05-20 23:08:51 +00:00
|
|
|
|
('Thrun', 'PERSON'),
|
|
|
|
|
('Recode', 'PERSON'),
|
|
|
|
|
('earlier this week', 'DATE'),
|
2020-08-20 17:08:02 +00:00
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_nouns(doc_en_core_web_sm):
|
|
|
|
|
assert [
|
|
|
|
|
chunk.text for chunk in doc_en_core_web_sm.noun_chunks] == [
|
|
|
|
|
'Sebastian Thrun',
|
|
|
|
|
'self-driving cars',
|
|
|
|
|
'Google',
|
|
|
|
|
'few people',
|
|
|
|
|
'the company',
|
|
|
|
|
'him',
|
|
|
|
|
'I',
|
|
|
|
|
'you',
|
|
|
|
|
'very senior CEOs',
|
|
|
|
|
'major American car companies',
|
|
|
|
|
'my hand',
|
|
|
|
|
'I',
|
|
|
|
|
'Thrun',
|
|
|
|
|
'an interview',
|
|
|
|
|
'Recode']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_verbs(doc_en_core_web_sm):
|
|
|
|
|
assert [
|
|
|
|
|
token.lemma_ for token in doc_en_core_web_sm if token.pos_ == "VERB"] == [
|
|
|
|
|
'start',
|
|
|
|
|
'work',
|
|
|
|
|
'drive',
|
|
|
|
|
'take',
|
|
|
|
|
'tell',
|
|
|
|
|
'shake',
|
|
|
|
|
'turn',
|
2021-05-20 23:08:51 +00:00
|
|
|
|
'be',
|
2020-08-20 17:08:02 +00:00
|
|
|
|
'talk',
|
|
|
|
|
'say']
|