Intelligence artificielle
John Samuel
				CPE Lyon
				Year: 2021-2022
				Email: john(dot)samuel(at)cpe(dot)fr
				 
			
				
			 from nltk.stem.porter import PorterStemmer
			
                          words = ["words", "eating", "went", "engineer", "tried"]
                          porter = PorterStemmer()
                          for word in words:
                            print(porter.stem(word), end=' ')
                         
Affichage
				
				 word eat went engin tri
			
                         
				
                          from nltk.stem.snowball import SnowballStemmer
			
                          
                          words = ["words", "eating", "went", "engineer", "tried"]
                          
                          snowball = SnowballStemmer("english")
                          for word in words:
                            print(snowball.stem(word))
                         
Affichage
				
				 word eat went engin tri
			
                         
				
                                  from nltk import ngrams
			
                                  
                                  sentence="He went to school yesterday and attended the classes"
                                  
                                  for n in range(1,5):
                                    print("\n{}-grams".format(n))
                                    n_grams = ngrams(sentence.split(), n)
                                    for ngram in n_grams:
                                      print(ngram, end=" ")
                         
Affichage
				
1-grams
			
('He',) ('went',) ('to',) ('school',) ('yesterday',) ('and',) ('attended',) ('the',) ('classes',) 
2-grams
('He', 'went') ('went', 'to') ('to', 'school') ('school', 'yesterday') ('yesterday', 'and') ('and', 'attended') ('attended', 'the') ('the', 'classes') 
3-grams
('He', 'went', 'to') ('went', 'to', 'school') ('to', 'school', 'yesterday') ('school', 'yesterday', 'and') ('yesterday', 'and', 'attended') ('and', 'attended', 'the') ('attended', 'the', 'classes') 
4-grams
('He', 'went', 'to', 'school') ('went', 'to', 'school', 'yesterday') ('to', 'school', 'yesterday', 'and') ('school', 'yesterday', 'and', 'attended') ('yesterday', 'and', 'attended', 'the') ('and', 'attended', 'the', 'classes')
                         
				
                          from nltk import pos_tag, word_tokenize
			
                          
                          sentence = "He goes to school daily"
                          
                          tokens = word_tokenize(sentence)
                          print(pos_tag(tokens))
                         
Affichage
				
				[('He', 'PRP'), ('goes', 'VBZ'), ('to', 'TO'), ('school', 'NN'), ('daily', 'RB')]
                         
			
				
				[('He', 'PRP'), ('goes', 'VBZ'), ('to', 'TO'), ('school', 'NN'), ('daily', 'RB')]
                         
			
| Balise | Signification | 
|---|---|
| PRP | pronoun, personal | 
| VBZ | verb, present tense, 3rd person singular | 
| TO | "to" as preposition | 
| NN | "noun, common, singular or mass | 
| RB | adverb | 
Installation
				
				 $ pip3 install spacy
			
				 $ python3 -m spacy download en_core_web_sm
                         
Installation
				
				  import spacy
			
                                  nlp = spacy.load("en_core_web_sm")
                         
				
				 import spacy
			
                                  nlp = spacy.load("en_core_web_sm")
                                  doc = nlp("He goes to school daily")
                                  
                                  for token in doc:
                                    print(token.text, token.pos_, token.dep_)
                         
				
				  He PRON nsubj
			
                                  goes VERB ROOT
                                  to ADP prep
                                  school NOUN pobj
                                  daily ADV advmod
                         
				
				 import spacy
			
                                  nlp = spacy.load("en_core_web_sm")
                                  doc = nlp("He goes to school daily")
                                  
                                  for token in doc:
				    print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_,
				      token.shape_, token.is_alpha, token.is_stop)
                         
				
				  He -PRON- PRON PRP nsubj Xx True True
			
                                  goes go VERB VBZ ROOT xxxx True False
                                  to to ADP IN prep xx True True
                                  school school NOUN NN pobj xxxx True False
                                  daily daily ADV RB advmod xxxx True False
                         
				
			  import nltk
			
                          nltk.download('punkt')
                          nltk.download('wordnet')
                          nltk.download('averaged_perceptron_tagger')
                         
				
			  from nltk.stem import WordNetLemmatizer
			
                          sentence = "He went to school yesterday and attended the classes"
                          lemmatizer = WordNetLemmatizer()
                          
                          for word in sentence.split():
                            print(lemmatizer.lemmatize(word), end=' ')
                         
Affichage
				
			     He went to school yesterday and attended the class
                         
			
				
                          from nltk.stem import WordNetLemmatizer
			
                          from nltk import word_tokenize, pos_tag
                          from nltk.corpus import wordnet as wn
                          
                          # Check the complete list of tags http://www.nltk.org/book/ch05.html
                          def wntag(tag):
                            if tag.startswith("J"):
                              return wn.ADJ
                            elif tag.startswith("R"):
                              return wn.ADV
                            elif tag.startswith("N"):
                              return wn.NOUN
                            elif tag.startswith("V"):
                              return wn.VERB
                            return None
                         
				
                          lemmatizer = WordNetLemmatizer()
			
                          
                          sentence = "I went to school today and he goes daily"
                          tokens = word_tokenize(sentence)
                          for token, tag in pos_tag(tokens):
                            if wntag(tag):
                              print(lemmatizer.lemmatize(token, wntag(tag)), end=' ')
                            else:
                              print(lemmatizer.lemmatize(token), end=' ')
                         
Affichage
				
			     I go to school today and he go daily
                         
			
				
				 import spacy
			
                                  nlp = spacy.load("en_core_web_sm")
                                  doc = nlp("I went to school today and he goes daily")
                                  
                                  for token in doc:
				    print(token.lemma_, end=' ')
                         
				
				  -PRON- go to school today and -PRON- go daily
			
                         
				
				 import spacy
			
				 from spacy import displacy
                                  nlp = spacy.load("en_core_web_sm")
                                  doc = nlp("He goes to school daily")
                                  
                                  displacy.serve(doc, style="dep")
                         
Installation: modèle vectoriel plus large
				
				 $ python3 -m spacy download en_core_web_lg
			
                         
Installation
				
				  import spacy
			
                                  nlp = spacy.load("en_core_web_lg")
                         
				
				  import spacy
			
                                  nlp = spacy.load("en_core_web_lg")
                                  doc1 = nlp("dog")
                                  doc2 = nlp("cat")
                                  doc3 = nlp("apple")
                                  
                                  print("similarity ({},{}): {} ".format(doc1.text, doc2.text, doc1.similarity(doc2)))
                                  print("similarity ({},{}): {} ".format(doc2.text, doc3.text, doc2.similarity(doc3)))
                                  print("similarity ({},{}): {} ".format(doc1.text, doc3.text, doc1.similarity(doc3)))
                         
Affichage
				
                                  similarity (dog,cat): 0.8016854705531046 
			
                                  similarity (cat,apple): 0.28213841802558415 
                                  similarity (dog,apple): 0.2633902481063797
                         
				
				 import spacy
			
                                  nlp = spacy.load("en_core_web_sm")
                                  doc = nlp("cat")
                                  
                                  for token in doc:
                                    print(token.vector)
                         
				
                 import gensim
			
                 from nltk.tokenize import sent_tokenize, word_tokenize
                 
                 data = "This is a class. This is a table"
                 
                 sentences = []
                 for sentence in sent_tokenize(data):
                   words = []
                   for word in word_tokenize(sentence):
                     words.append(word.lower())
                   sentences.append(words)
                 
                         
				
                 # min_count: Ignorer tous les mots dont la fréquence totale est inférieure à cette valeur.
			
                 # window: Distance maximale entre le mot courant et le mot prédit dans une phrase
                 cbow = gensim.models.Word2Vec(sentences, min_count=1, size=100, window=3)
                 
                 # afficher la valeur du vecteur
                 print(cbow.wv["this"])
                 
                 # similarité entre deux mots
                 print(cbow.wv.similarity("this", "class"))
                 
                 # prédire deux mots
                 print(cbow.predict_output_word(["is"], topn=2))
                         
				
                 # min_count: Ignorer tous les mots dont la fréquence totale est inférieure à cette valeur.
			
                 # window: Distance maximale entre le mot courant et le mot prédit dans une phrase
                 # sg: 1 pour skip-gram ; sinon CBOW.
                 sgram = gensim.models.Word2Vec(sentences, min_count=1, size=100, window=5, sg=1)
                 
                 # afficher la valeur du vecteur
                 print(sgram.wv["this"])
                 
                 # similarité entre deux mots
                 print(sgram.wv.similarity("this", "class"))
                 
                 # prédire deux mots
                 print(sgram.predict_output_word(["is"], topn=2))
                         
Extraire les entités nommées et les assigner à des catégories spécifiques.
				
				 import spacy
			
                                  nlp = spacy.load("en_core_web_sm")
                                  doc = nlp("Paris is the capital of France. In 2015, its population was recorded as 2,206,488")
                                  
                                  for entity in doc.ents:
                                    print(entity.text, entity.start_char, entity.end_char, entity.label_)
                         
				
                                  Paris 0 5 GPE
			
                                  France 24 30 GPE
                                  2015 35 39 DATE
                                  2,206,488 72 81 CARDINAL
                         
				
				 import spacy
			
                                  nlp = spacy.load("en_core_web_sm")
                                  doc = nlp("Paris is the capital of France. In 2015, its population was recorded as 2,206,488")
                                  
                                  displacy.serve(doc, style="ent")
                         
| Balise | Signification | 
|---|---|
| GPE | Pays, villes, états. | 
| DATE | Dates ou périodes absolues ou relatives | 
| CARDINAL | Les chiffres qui ne correspondent à aucun autre type. | 
Installation
				
			import nltk
                        nltk.download('vader_lexicon')
                         
			
Usage
				
			from nltk.sentiment.vader import SentimentIntensityAnalyzer
                        sia = SentimentIntensityAnalyzer()
                        sentiment = sia.polarity_scores("this movie is good")
                        print(sentiment)
                        sentiment = sia.polarity_scores("this movie is not very good")
                        print(sentiment)
                        sentiment = sia.polarity_scores("this movie is bad")
                        print(sentiment)
                         
			
Affichage
				
			{'neg': 0.0, 'neu': 0.508, 'pos': 0.492, 'compound': 0.4404}
                        {'neg': 0.344, 'neu': 0.656, 'pos': 0.0, 'compound': -0.3865}
                        {'neg': 0.538, 'neu': 0.462, 'pos': 0.0, 'compound': -0.5423}
                         
			
Un programme Prolog contient des clauses de la forme suivante.
                          Tête : - Corps.
                        
			Une clause avec un corps vide est appelée fait.
                          cat(bob).
                          cat(alice).
                        
		Sur une machine Ubuntu
                          $ sudo apt install gprolog
                        
		
$ prolog
GNU Prolog 1.4.5 (64 bits)
Compiled Feb  5 2017, 10:30:08 with gcc
By Daniel Diaz
Copyright (C) 1999-2016 Daniel Diaz
| ?- [user].
compiling user for byte code...
cat(tom).
cat(alice).
user compiled, 2 lines read - 241 bytes written, 12239 ms
(4 ms) yes
| ?- 
                        
		
?- cat(X).
X = tom ? 
yes
| ?- cat(bob).
no
                        
		
| ?- [user].                             
compiling user for byte code...
cat(tom).                           
cat(alice).                         
allcats(L) :- findall(X, cat(X), L).
user compiled, 3 lines read - 490 bytes written, 10638 ms
yes
| ?- allcats(L).                         
L = [tom,alice]
yes
                        
		
| ?- [user].              
compiling user for byte code...
friend(bob, alice).  
friend(alice, kevin).
friend(bob, thomas).                
friend(bob, peter).  
user compiled, 4 lines read - 486 bytes written, 77256 ms
(10 ms) yes
| ?- friend(bob, X).      
X = alice ? a
X = thomas
X = peter
(1 ms) yes
                        
		
			
$ cat friend.pl
friend(bob, alice).
friend(alice, kevin).
friend(bob, thomas).
friend(bob, peter).
human(X):-friend(X,_).
human(Y):-friend(_,Y).
                        
			
		
			
$ prolog --consult-file friend.pl
GNU Prolog 1.4.5 (64 bits)
Compiled Feb 23 2020, 20:14:50 with gcc
By Daniel Diaz
Copyright (C) 1999-2020 Daniel Diaz
compiling /home/user/friend.pl for byte code...
/home/user/friend.pl compiled, 4 lines read - 515 bytes written, 22 ms
| ?- friend(bob,alice).
true ?
yes
                        
			
		
			
$ prolog --consult-file friend.pl
| ?- human(X).
X = bob ? a
X = alice
X = bob
X = bob
X = alice
X = kevin
X = thomas
X = peter
yes
| ?-