Intelligence artificielle
John Samuel
CPE Lyon
Year: 2021-2022
Email: john(dot)samuel(at)cpe(dot)fr
from nltk.stem.porter import PorterStemmer
words = ["words", "eating", "went", "engineer", "tried"]
porter = PorterStemmer()
for word in words:
print(porter.stem(word), end=' ')
Affichage
word eat went engin tri
from nltk.stem.snowball import SnowballStemmer
words = ["words", "eating", "went", "engineer", "tried"]
snowball = SnowballStemmer("english")
for word in words:
print(snowball.stem(word))
Affichage
word eat went engin tri
from nltk import ngrams
sentence="He went to school yesterday and attended the classes"
for n in range(1,5):
print("\n{}-grams".format(n))
n_grams = ngrams(sentence.split(), n)
for ngram in n_grams:
print(ngram, end=" ")
Affichage
1-grams
('He',) ('went',) ('to',) ('school',) ('yesterday',) ('and',) ('attended',) ('the',) ('classes',)
2-grams
('He', 'went') ('went', 'to') ('to', 'school') ('school', 'yesterday') ('yesterday', 'and') ('and', 'attended') ('attended', 'the') ('the', 'classes')
3-grams
('He', 'went', 'to') ('went', 'to', 'school') ('to', 'school', 'yesterday') ('school', 'yesterday', 'and') ('yesterday', 'and', 'attended') ('and', 'attended', 'the') ('attended', 'the', 'classes')
4-grams
('He', 'went', 'to', 'school') ('went', 'to', 'school', 'yesterday') ('to', 'school', 'yesterday', 'and') ('school', 'yesterday', 'and', 'attended') ('yesterday', 'and', 'attended', 'the') ('and', 'attended', 'the', 'classes')
from nltk import pos_tag, word_tokenize
sentence = "He goes to school daily"
tokens = word_tokenize(sentence)
print(pos_tag(tokens))
Affichage
[('He', 'PRP'), ('goes', 'VBZ'), ('to', 'TO'), ('school', 'NN'), ('daily', 'RB')]
[('He', 'PRP'), ('goes', 'VBZ'), ('to', 'TO'), ('school', 'NN'), ('daily', 'RB')]
Balise | Signification |
---|---|
PRP | pronoun, personal |
VBZ | verb, present tense, 3rd person singular |
TO | "to" as preposition |
NN | "noun, common, singular or mass |
RB | adverb |
Installation
$ pip3 install spacy
$ python3 -m spacy download en_core_web_sm
Installation
import spacy
nlp = spacy.load("en_core_web_sm")
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp("He goes to school daily")
for token in doc:
print(token.text, token.pos_, token.dep_)
He PRON nsubj
goes VERB ROOT
to ADP prep
school NOUN pobj
daily ADV advmod
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp("He goes to school daily")
for token in doc:
print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_,
token.shape_, token.is_alpha, token.is_stop)
He -PRON- PRON PRP nsubj Xx True True
goes go VERB VBZ ROOT xxxx True False
to to ADP IN prep xx True True
school school NOUN NN pobj xxxx True False
daily daily ADV RB advmod xxxx True False
import nltk
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')
from nltk.stem import WordNetLemmatizer
sentence = "He went to school yesterday and attended the classes"
lemmatizer = WordNetLemmatizer()
for word in sentence.split():
print(lemmatizer.lemmatize(word), end=' ')
Affichage
He went to school yesterday and attended the class
from nltk.stem import WordNetLemmatizer
from nltk import word_tokenize, pos_tag
from nltk.corpus import wordnet as wn
# Check the complete list of tags http://www.nltk.org/book/ch05.html
def wntag(tag):
if tag.startswith("J"):
return wn.ADJ
elif tag.startswith("R"):
return wn.ADV
elif tag.startswith("N"):
return wn.NOUN
elif tag.startswith("V"):
return wn.VERB
return None
lemmatizer = WordNetLemmatizer()
sentence = "I went to school today and he goes daily"
tokens = word_tokenize(sentence)
for token, tag in pos_tag(tokens):
if wntag(tag):
print(lemmatizer.lemmatize(token, wntag(tag)), end=' ')
else:
print(lemmatizer.lemmatize(token), end=' ')
Affichage
I go to school today and he go daily
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp("I went to school today and he goes daily")
for token in doc:
print(token.lemma_, end=' ')
-PRON- go to school today and -PRON- go daily
import spacy
from spacy import displacy
nlp = spacy.load("en_core_web_sm")
doc = nlp("He goes to school daily")
displacy.serve(doc, style="dep")
Installation: modèle vectoriel plus large
$ python3 -m spacy download en_core_web_lg
Installation
import spacy
nlp = spacy.load("en_core_web_lg")
import spacy
nlp = spacy.load("en_core_web_lg")
doc1 = nlp("dog")
doc2 = nlp("cat")
doc3 = nlp("apple")
print("similarity ({},{}): {} ".format(doc1.text, doc2.text, doc1.similarity(doc2)))
print("similarity ({},{}): {} ".format(doc2.text, doc3.text, doc2.similarity(doc3)))
print("similarity ({},{}): {} ".format(doc1.text, doc3.text, doc1.similarity(doc3)))
Affichage
similarity (dog,cat): 0.8016854705531046
similarity (cat,apple): 0.28213841802558415
similarity (dog,apple): 0.2633902481063797
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp("cat")
for token in doc:
print(token.vector)
import gensim
from nltk.tokenize import sent_tokenize, word_tokenize
data = "This is a class. This is a table"
sentences = []
for sentence in sent_tokenize(data):
words = []
for word in word_tokenize(sentence):
words.append(word.lower())
sentences.append(words)
# min_count: Ignorer tous les mots dont la fréquence totale est inférieure à cette valeur.
# window: Distance maximale entre le mot courant et le mot prédit dans une phrase
cbow = gensim.models.Word2Vec(sentences, min_count=1, size=100, window=3)
# afficher la valeur du vecteur
print(cbow.wv["this"])
# similarité entre deux mots
print(cbow.wv.similarity("this", "class"))
# prédire deux mots
print(cbow.predict_output_word(["is"], topn=2))
# min_count: Ignorer tous les mots dont la fréquence totale est inférieure à cette valeur.
# window: Distance maximale entre le mot courant et le mot prédit dans une phrase
# sg: 1 pour skip-gram ; sinon CBOW.
sgram = gensim.models.Word2Vec(sentences, min_count=1, size=100, window=5, sg=1)
# afficher la valeur du vecteur
print(sgram.wv["this"])
# similarité entre deux mots
print(sgram.wv.similarity("this", "class"))
# prédire deux mots
print(sgram.predict_output_word(["is"], topn=2))
Extraire les entités nommées et les assigner à des catégories spécifiques.
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp("Paris is the capital of France. In 2015, its population was recorded as 2,206,488")
for entity in doc.ents:
print(entity.text, entity.start_char, entity.end_char, entity.label_)
Paris 0 5 GPE
France 24 30 GPE
2015 35 39 DATE
2,206,488 72 81 CARDINAL
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp("Paris is the capital of France. In 2015, its population was recorded as 2,206,488")
displacy.serve(doc, style="ent")
Balise | Signification |
---|---|
GPE | Pays, villes, états. |
DATE | Dates ou périodes absolues ou relatives |
CARDINAL | Les chiffres qui ne correspondent à aucun autre type. |
Installation
import nltk
nltk.download('vader_lexicon')
Usage
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()
sentiment = sia.polarity_scores("this movie is good")
print(sentiment)
sentiment = sia.polarity_scores("this movie is not very good")
print(sentiment)
sentiment = sia.polarity_scores("this movie is bad")
print(sentiment)
Affichage
{'neg': 0.0, 'neu': 0.508, 'pos': 0.492, 'compound': 0.4404}
{'neg': 0.344, 'neu': 0.656, 'pos': 0.0, 'compound': -0.3865}
{'neg': 0.538, 'neu': 0.462, 'pos': 0.0, 'compound': -0.5423}
Un programme Prolog contient des clauses de la forme suivante.
Tête : - Corps.
Une clause avec un corps vide est appelée fait.
cat(bob).
cat(alice).
Sur une machine Ubuntu
$ sudo apt install gprolog
$ prolog
GNU Prolog 1.4.5 (64 bits)
Compiled Feb 5 2017, 10:30:08 with gcc
By Daniel Diaz
Copyright (C) 1999-2016 Daniel Diaz
| ?- [user].
compiling user for byte code...
cat(tom).
cat(alice).
user compiled, 2 lines read - 241 bytes written, 12239 ms
(4 ms) yes
| ?-
?- cat(X).
X = tom ?
yes
| ?- cat(bob).
no
| ?- [user].
compiling user for byte code...
cat(tom).
cat(alice).
allcats(L) :- findall(X, cat(X), L).
user compiled, 3 lines read - 490 bytes written, 10638 ms
yes
| ?- allcats(L).
L = [tom,alice]
yes
| ?- [user].
compiling user for byte code...
friend(bob, alice).
friend(alice, kevin).
friend(bob, thomas).
friend(bob, peter).
user compiled, 4 lines read - 486 bytes written, 77256 ms
(10 ms) yes
| ?- friend(bob, X).
X = alice ? a
X = thomas
X = peter
(1 ms) yes
$ cat friend.pl
friend(bob, alice).
friend(alice, kevin).
friend(bob, thomas).
friend(bob, peter).
human(X):-friend(X,_).
human(Y):-friend(_,Y).
$ prolog --consult-file friend.pl
GNU Prolog 1.4.5 (64 bits)
Compiled Feb 23 2020, 20:14:50 with gcc
By Daniel Diaz
Copyright (C) 1999-2020 Daniel Diaz
compiling /home/user/friend.pl for byte code...
/home/user/friend.pl compiled, 4 lines read - 515 bytes written, 22 ms
| ?- friend(bob,alice).
true ?
yes
$ prolog --consult-file friend.pl
| ?- human(X).
X = bob ? a
X = alice
X = bob
X = bob
X = alice
X = kevin
X = thomas
X = peter
yes
| ?-