Intelligence artificielle
John Samuel
 CPE Lyon
                Year: 2022-2023
                Email: john(dot)samuel(at)cpe(dot)fr
                 
            
                
			 from nltk.stem.porter import PorterStemmer
            
                          words = ["words", "eating", "went", "engineer", "tried"]
                          porter = PorterStemmer()
                          for word in words:
                            print(porter.stem(word), end=' ')
                         
Affichage
                
				 word eat went engin tri
            
                         
                
                          from nltk.stem.snowball import SnowballStemmer
            
                          
                          words = ["words", "eating", "went", "engineer", "tried"]
                          
                          snowball = SnowballStemmer("english")
                          for word in words:
                            print(snowball.stem(word))
                         
Affichage
                
				 word eat went engin tri
            
                         
                
                                  from nltk import ngrams
            
                                  
                                  sentence="He went to school yesterday and attended the classes"
                                  
                                  for n in range(1,5):
                                    print("\n{}-grams".format(n))
                                    n_grams = ngrams(sentence.split(), n)
                                    for ngram in n_grams:
                                      print(ngram, end=" ")
                         
Affichage
                
1-grams
            
('He',) ('went',) ('to',) ('school',) ('yesterday',) ('and',) ('attended',) ('the',) ('classes',) 
2-grams
('He', 'went') ('went', 'to') ('to', 'school') ('school', 'yesterday') ('yesterday', 'and') ('and', 'attended') ('attended', 'the') ('the', 'classes') 
3-grams
('He', 'went', 'to') ('went', 'to', 'school') ('to', 'school', 'yesterday') ('school', 'yesterday', 'and') ('yesterday', 'and', 'attended') ('and', 'attended', 'the') ('attended', 'the', 'classes') 
4-grams
('He', 'went', 'to', 'school') ('went', 'to', 'school', 'yesterday') ('to', 'school', 'yesterday', 'and') ('school', 'yesterday', 'and', 'attended') ('yesterday', 'and', 'attended', 'the') ('and', 'attended', 'the', 'classes')
                         
                
                          from nltk import pos_tag, word_tokenize
            
                          
                          sentence = "He goes to school daily"
                          
                          tokens = word_tokenize(sentence)
                          print(pos_tag(tokens))
                         
Affichage
                
				[('He', 'PRP'), ('goes', 'VBZ'), ('to', 'TO'), ('school', 'NN'), ('daily', 'RB')]
                         
            
                
				[('He', 'PRP'), ('goes', 'VBZ'), ('to', 'TO'), ('school', 'NN'), ('daily', 'RB')]
                         
            
| Balise | Signification | 
|---|---|
| PRP | pronoun, personal | 
| VBZ | verb, present tense, 3rd person singular | 
| TO | "to" as preposition | 
| NN | "noun, common, singular or mass | 
| RB | adverb | 
Installation
                
				 $ pip3 install spacy
            
				 $ python3 -m spacy download en_core_web_sm
                         
Installation
                
				  import spacy
            
                                  nlp = spacy.load("en_core_web_sm")
                         
                
				 import spacy
            
                                  nlp = spacy.load("en_core_web_sm")
                                  doc = nlp("He goes to school daily")
                                  
                                  for token in doc:
                                    print(token.text, token.pos_, token.dep_)
                         
                
				  He PRON nsubj
            
                                  goes VERB ROOT
                                  to ADP prep
                                  school NOUN pobj
                                  daily ADV advmod
                         
                
				 import spacy
            
                                  nlp = spacy.load("en_core_web_sm")
                                  doc = nlp("He goes to school daily")
                                  
                                  for token in doc:
				    print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_,
				      token.shape_, token.is_alpha, token.is_stop)
                         
                
				  He -PRON- PRON PRP nsubj Xx True True
            
                                  goes go VERB VBZ ROOT xxxx True False
                                  to to ADP IN prep xx True True
                                  school school NOUN NN pobj xxxx True False
                                  daily daily ADV RB advmod xxxx True False
                         
                
			  import nltk
            
                          nltk.download('punkt')
                          nltk.download('wordnet')
                          nltk.download('averaged_perceptron_tagger')
                         
                
			  from nltk.stem import WordNetLemmatizer
            
                          sentence = "He went to school yesterday and attended the classes"
                          lemmatizer = WordNetLemmatizer()
                          
                          for word in sentence.split():
                            print(lemmatizer.lemmatize(word), end=' ')
                         
Affichage
                
			     He went to school yesterday and attended the class
                         
            
                
                          from nltk.stem import WordNetLemmatizer
            
                          from nltk import word_tokenize, pos_tag
                          from nltk.corpus import wordnet as wn
                          
                          # Check the complete list of tags http://www.nltk.org/book/ch05.html
                          def wntag(tag):
                            if tag.startswith("J"):
                              return wn.ADJ
                            elif tag.startswith("R"):
                              return wn.ADV
                            elif tag.startswith("N"):
                              return wn.NOUN
                            elif tag.startswith("V"):
                              return wn.VERB
                            return None
                         
                
                          lemmatizer = WordNetLemmatizer()
            
                          
                          sentence = "I went to school today and he goes daily"
                          tokens = word_tokenize(sentence)
                          for token, tag in pos_tag(tokens):
                            if wntag(tag):
                              print(lemmatizer.lemmatize(token, wntag(tag)), end=' ')
                            else:
                              print(lemmatizer.lemmatize(token), end=' ')
                         
Affichage
                
			     I go to school today and he go daily
                         
            
                
				 import spacy
            
                                  nlp = spacy.load("en_core_web_sm")
                                  doc = nlp("I went to school today and he goes daily")
                                  
                                  for token in doc:
				    print(token.lemma_, end=' ')
                         
                
				  -PRON- go to school today and -PRON- go daily
            
                         
                
				 import spacy
            
				 from spacy import displacy
                                  nlp = spacy.load("en_core_web_sm")
                                  doc = nlp("He goes to school daily")
                                  
                                  displacy.serve(doc, style="dep")
                         
Installation: modèle vectoriel plus large
                
				 $ python3 -m spacy download en_core_web_lg
            
                         
Installation
                
				  import spacy
            
                                  nlp = spacy.load("en_core_web_lg")
                         
                
				  import spacy
            
                                  nlp = spacy.load("en_core_web_lg")
                                  doc1 = nlp("dog")
                                  doc2 = nlp("cat")
                                  doc3 = nlp("apple")
                                  
                                  print("similarity ({},{}): {} ".format(doc1.text, doc2.text, doc1.similarity(doc2)))
                                  print("similarity ({},{}): {} ".format(doc2.text, doc3.text, doc2.similarity(doc3)))
                                  print("similarity ({},{}): {} ".format(doc1.text, doc3.text, doc1.similarity(doc3)))
                         
Affichage
                
                                  similarity (dog,cat): 0.8016854705531046 
            
                                  similarity (cat,apple): 0.28213841802558415 
                                  similarity (dog,apple): 0.2633902481063797
                         
                
				 import spacy
            
                                  nlp = spacy.load("en_core_web_sm")
                                  doc = nlp("cat")
                                  
                                  for token in doc:
                                    print(token.vector)
                         
                
                 import gensim
            
                 from nltk.tokenize import sent_tokenize, word_tokenize
                 
                 data = "This is a class. This is a table"
                 
                 sentences = []
                 for sentence in sent_tokenize(data):
                   words = []
                   for word in word_tokenize(sentence):
                     words.append(word.lower())
                   sentences.append(words)
                 
                         
                
                 # min_count: Ignorer tous les mots dont la fréquence totale est inférieure à cette valeur.
            
                 # window: Distance maximale entre le mot courant et le mot prédit dans une phrase
                 cbow = gensim.models.Word2Vec(sentences, min_count=1, size=100, window=3)
                 
                 # afficher la valeur du vecteur
                 print(cbow.wv["this"])
                 
                 # similarité entre deux mots
                 print(cbow.wv.similarity("this", "class"))
                 
                 # prédire deux mots
                 print(cbow.predict_output_word(["is"], topn=2))
                         
                
                 # min_count: Ignorer tous les mots dont la fréquence totale est inférieure à cette valeur.
            
                 # window: Distance maximale entre le mot courant et le mot prédit dans une phrase
                 # sg: 1 pour skip-gram ; sinon CBOW.
                 sgram = gensim.models.Word2Vec(sentences, min_count=1, size=100, window=5, sg=1)
                 
                 # afficher la valeur du vecteur
                 print(sgram.wv["this"])
                 
                 # similarité entre deux mots
                 print(sgram.wv.similarity("this", "class"))
                 
                 # prédire deux mots
                 print(sgram.predict_output_word(["is"], topn=2))
                         
Extraire les entités nommées et les assigner à des catégories spécifiques.
                
				 import spacy
            
                                  nlp = spacy.load("en_core_web_sm")
                                  doc = nlp("Paris is the capital of France. In 2015, its population was recorded as 2,206,488")
                                  
                                  for entity in doc.ents:
                                    print(entity.text, entity.start_char, entity.end_char, entity.label_)
                         
                
                                  Paris 0 5 GPE
            
                                  France 24 30 GPE
                                  2015 35 39 DATE
                                  2,206,488 72 81 CARDINAL
                         
                
				 import spacy
            
                                  nlp = spacy.load("en_core_web_sm")
                                  doc = nlp("Paris is the capital of France. In 2015, its population was recorded as 2,206,488")
                                  
                                  displacy.serve(doc, style="ent")
                         
| Balise | Signification | 
|---|---|
| GPE | Pays, villes, états. | 
| DATE | Dates ou périodes absolues ou relatives | 
| CARDINAL | Les chiffres qui ne correspondent à aucun autre type. | 
Installation
                
			import nltk
                        nltk.download('vader_lexicon')
                         
            
Usage
                
			from nltk.sentiment.vader import SentimentIntensityAnalyzer
                        sia = SentimentIntensityAnalyzer()
                        sentiment = sia.polarity_scores("this movie is good")
                        print(sentiment)
                        sentiment = sia.polarity_scores("this movie is not very good")
                        print(sentiment)
                        sentiment = sia.polarity_scores("this movie is bad")
                        print(sentiment)
                         
            
Affichage
                
			{'neg': 0.0, 'neu': 0.508, 'pos': 0.492, 'compound': 0.4404}
                        {'neg': 0.344, 'neu': 0.656, 'pos': 0.0, 'compound': -0.3865}
                        {'neg': 0.538, 'neu': 0.462, 'pos': 0.0, 'compound': -0.5423}
                         
            
Un programme Prolog contient des clauses de la forme suivante.
                          Tête : - Corps.
                        
            Une clause avec un corps vide est appelée fait.
                          cat(bob).
                          cat(alice).
                        
        Sur une machine Ubuntu
                          $ sudo apt install gprolog
                        
        
$ prolog
GNU Prolog 1.4.5 (64 bits)
Compiled Feb  5 2017, 10:30:08 with gcc
By Daniel Diaz
Copyright (C) 1999-2016 Daniel Diaz
| ?- [user].
compiling user for byte code...
cat(tom).
cat(alice).
user compiled, 2 lines read - 241 bytes written, 12239 ms
(4 ms) yes
| ?- 
                        
        
?- cat(X).
X = tom ? 
yes
| ?- cat(bob).
no
                        
        
| ?- [user].                             
compiling user for byte code...
cat(tom).                           
cat(alice).                         
allcats(L) :- findall(X, cat(X), L).
user compiled, 3 lines read - 490 bytes written, 10638 ms
yes
| ?- allcats(L).                         
L = [tom,alice]
yes
                        
        
| ?- [user].              
compiling user for byte code...
friend(bob, alice).  
friend(alice, kevin).
friend(bob, thomas).                
friend(bob, peter).  
user compiled, 4 lines read - 486 bytes written, 77256 ms
(10 ms) yes
| ?- friend(bob, X).      
X = alice ? a
X = thomas
X = peter
(1 ms) yes
                        
        
			
$ cat friend.pl
friend(bob, alice).
friend(alice, kevin).
friend(bob, thomas).
friend(bob, peter).
human(X):-friend(X,_).
human(Y):-friend(_,Y).
                        
			
        
			
$ prolog --consult-file friend.pl
GNU Prolog 1.4.5 (64 bits)
Compiled Feb 23 2020, 20:14:50 with gcc
By Daniel Diaz
Copyright (C) 1999-2020 Daniel Diaz
compiling /home/user/friend.pl for byte code...
/home/user/friend.pl compiled, 4 lines read - 515 bytes written, 22 ms
| ?- friend(bob,alice).
true ?
yes
                        
			
        
			
$ prolog --consult-file friend.pl
| ?- human(X).
X = bob ? a
X = alice
X = bob
X = bob
X = alice
X = kevin
X = thomas
X = peter
yes
| ?-