| |
| |
| |
| |
| |
| |
| |
| |
| |
| import spacy |
| |
| |
| def check_word_property(): |
| nlp = spacy.load("en_core_web_sm") |
| input_sentence = input('Please input a sentence: ') |
| while input_sentence: |
| doc = nlp(input_sentence) |
| word_property = [{'word': w.text, 'property': w.pos_} for w in doc] |
| noun_list = [wp['word'] for wp in word_property if wp['property'] == 'NOUN'] |
| verb_list = [wp['word'] for wp in word_property if wp['property'] == 'VERB'] |
| print('Input sentence: %s' % input_sentence) |
| print('Noun list: %s' % ', '.join(noun_list)) |
| print('Verb list: %s\n' % ', '.join(verb_list)) |
| input_sentence = input('Please input a sentence: ') |
| |
| |
| def check_en_word_similarity(): |
| |
| |
| |
| nlp = spacy.load('en_core_web_lg') |
| tokens = nlp('dog cat banana ssafsf') |
| |
| |
| |
| print('Token属性:\n') |
| for token in tokens: |
| print(token.text, token.has_vector, token.vector_norm, token.is_oov) |
| |
| print('\nToken两两之间关系:\n') |
| for token1 in tokens: |
| for token2 in tokens: |
| print('%s and %s: %s' % (token1, token2, token1.similarity(token2))) |
| |
| |
| def check_en_doc_similarity(): |
| nlp = spacy.load('en_core_web_lg') |
| doc1 = nlp('How to create ticket?') |
| doc2 = nlp('How can I create ticket?') |
| doc3 = nlp('I want to create ticket?') |
| doc4 = nlp('Do you know how to create the ticket?') |
| print(doc1.similarity(doc2)) |
| print(doc2.similarity(doc3)) |
| print(doc1.similarity(doc3)) |
| print(doc1.similarity(doc4)) |
| |
| sentence3 = nlp('Do you know I love you?') |
| sentence4 = nlp('I love you so much?') |
| print(sentence3.similarity(sentence4)) |
| |
| |
| def check_zh_doc_similarity(): |
| nlp = spacy.load('zh_core_web_lg') |
| doc1 = nlp('你好吗?') |
| doc2 = nlp('你还好吗?') |
| doc3 = nlp('今天你还好吗?') |
| doc4 = nlp('你的身体今天还好吗?') |
| print(doc1.similarity(doc2)) |
| print(doc2.similarity(doc3)) |
| print(doc1.similarity(doc3)) |
| print(doc1.similarity(doc4)) |
| |
| |
| if __name__ == '__main__': |
| check_word_property() |
| |
| check_en_word_similarity() |
| |
| check_zh_doc_similarity() |
| check_en_doc_similarity() |
| |
| print('end!') |