1 latent semantic analyser (unsupervised learning) · lsa september 19, 2018 1 latent semantic...

45
LSA September 19, 2018 1 Latent Semantic Analyser (Unsupervised Learning) 1.1 An easy way for reducing dimensionality of the data 1.1.1 Importing Libraries In [1]: import nltk import numpy as np from nltk.stem import WordNetLemmatizer from sklearn.linear_model import LogisticRegression from bs4 import BeautifulSoup import matplotlib.pyplot as plt from sklearn.decomposition import TruncatedSVD import pandas as pd 1.1.2 Reading data files and adding more stopwords In [2]: wordnet_lemmatiser = WordNetLemmatizer() titles = [line.rstrip() for line in open("all_book_titles.txt")] stop_words = set(w.rstrip() for w in open("stopwords.txt")) stop_words = stop_words.union({ 'introduction', 'edition', 'series', 'application', 'approach', 'card', 'access', 'package', 'plus', 'etext', 'brief', 'vol', 'fundamental', 'guide', 'essential', 'printed', 'third', 'second', 'fourth','and', 'the'}) 1.1.3 A custom tokeniser to process every title and extract morphene form of the words out of it In [3]: def my_tokeniser(s): s = s.lower() tokens = nltk.tokenize.word_tokenize(s) tokens = [t for t in tokens if len(t)>2] tokens = [wordnet_lemmatiser.lemmatize(t) for t in tokens] tokens = [t for t in tokens if t not in stop_words] tokens = [t for t in tokens if not any(c.isdigit() for c in t)] return tokens 1

Upload: phunghanh

Post on 17-Jun-2019

213 views

Category:

Documents


0 download

TRANSCRIPT

LSA

September 19, 2018

1 Latent Semantic Analyser (Unsupervised Learning)

1.1 An easy way for reducing dimensionality of the data

1.1.1 Importing Libraries

In [1]: import nltk

import numpy as np

from nltk.stem import WordNetLemmatizer

from sklearn.linear_model import LogisticRegression

from bs4 import BeautifulSoup

import matplotlib.pyplot as plt

from sklearn.decomposition import TruncatedSVD

import pandas as pd

1.1.2 Reading data files and adding more stopwords

In [2]: wordnet_lemmatiser = WordNetLemmatizer()

titles = [line.rstrip() for line in open("all_book_titles.txt")]

stop_words = set(w.rstrip() for w in open("stopwords.txt"))

stop_words = stop_words.union({

'introduction', 'edition', 'series', 'application',

'approach', 'card', 'access', 'package', 'plus', 'etext',

'brief', 'vol', 'fundamental', 'guide', 'essential', 'printed',

'third', 'second', 'fourth','and', 'the'})

1.1.3 A custom tokeniser to process every title and extract morphene form of the words out of

it

In [3]: def my_tokeniser(s):

s = s.lower()

tokens = nltk.tokenize.word_tokenize(s)

tokens = [t for t in tokens if len(t)>2]

tokens = [wordnet_lemmatiser.lemmatize(t) for t in tokens]

tokens = [t for t in tokens if t not in stop_words]

tokens = [t for t in tokens if not any(c.isdigit() for c in t)]

return tokens

1

# sample_string = "This is a test sentence to check how the custom tokeniser is working.

# my_tokens = my_tokeniser(sample_string)

# my_tokens

1.1.4 Preparing a word index mapper which maps every word to index in the sentence in

which it occurs

For every review, we break it into a set of tokens and then for each token we check whether itexists in a word to index mapper or not. If it exists we skip it else we store the word in the mapperwith index as the current index of the word index mapper. Word index mapper is required so thatwe can pass tokens to it and it returns the location of the word in the dataframe where we canassign a vector value to it

In [4]: word_index_map = {}

current_index = 0

all_tokens = []

all_titles = []

index_word_map = []

error_count = 0

for title in titles:

try:

title = title.encode('ascii', 'ignore').decode('utf-8') # this will throw exception

all_titles.append(title)

tokens = my_tokeniser(title)

all_tokens.append(tokens)

for token in tokens:

if token not in word_index_map:

word_index_map[token] = current_index

current_index += 1

index_word_map.append(token)

except Exception as e:

print(e)

print(title)

error_count += 1

In [5]: word_index_map["reader"]

Out[5]: 3

In [6]: # Function to convert tokens of a given review and return a "1" at the corresponding location

def tokens_to_vector(tokens):

x=np.zeros(len(word_index_map))

for t in tokens:

i = word_index_map[t]

x[i]=1

return x

In [7]: # Preparing a NXD Matrix

2

N = len(all_tokens) # N=Total number of reviews processed

D = len(word_index_map) # D=Total number of unique tokens extracted out of the reviews

X = np.zeros((D,N)) # Generating a matrix

i=0

In [8]: for tokens in all_tokens:

X[:,i] = tokens_to_vector(tokens)

i+=1

In [9]: all_tokens

Out[9]: [['philosophy', 'sex', 'love', 'reader'],

['reading', 'judaism', 'christianity', 'islam'],

['microprocessor', 'principle'],

['bernhard', 'edouard', 'fernow', 'story', 'north', 'american', 'forestry'],

['encyclopedia', 'buddhism'],

['motorola', 'microprocessor', 'family', 'programming', 'interfacing'],

['american', 'anthem', 'student', 'modern', 'era'],

['read',

'literature',

'professor',

'lively',

'entertaining',

'reading',

'line'],

['mar',

'woman',

'venus',

'secret',

'sex',

'improving',

'communication',

'lasting',

'intimacy',

'fulfillment',

'giving',

'receiving',

'love',

'secret',

'passion',

'understanding',

'martian'],

['religious',

'tradition',

'world',

'journey',

'africa',

'mesoamerica',

3

'north',

'america',

'judaism',

'christianity',

'islam',

'hinduism',

'buddhism',

'china'],

['world', 'wisdom', 'sacred', 'text', 'world', 'religion'],

['illustrated', 'world', 'religion', 'wisdom', 'tradition'],

['soul', 'sex', 'cultivating', 'life', 'act', 'love'],

['thriving', 'chaos', 'handbook', 'management', 'revolution'],

['blood', 'relative'],

['wheelock', 'latin'],

['choice', 'uncertainty', 'chaos', 'luck', 'thrive', 'despite'],

['expanding', 'discourse', 'feminism', 'art', 'history'],

['relativity', 'pure', 'applied', 'physic'],

['experiment', 'microprocessor', 'interfacing', 'programming', 'hardware'],

['pathophysiology', 'concept', 'health', 'care', 'professional'],

['power', 'system', 'operation'],

['machine', 'learning'],

['e-commerce', 'strategy', 'technology'],

['real', 'complex', 'analysis'],

['schaum', 'outline', 'microprocessor'],

['schaum', 'microprocessor', 'roger', 'tokheim', 'paperback'],

['probability', 'random', 'variable', 'stochastic', 'process'],

['medical', 'microbiology', 'immunology', 'picture'],

['security', 'analysis', 'sixth', 'foreword', 'warren', 'buffett'],

['goodman',

'gilman',

'pharmacological',

'basis',

'therapeutic',

'twelfth',

'goodman',

'gilman',

'pharmacological',

'basis',

'therapeutic'],

['oca/ocp',

'oracle',

'database',

'all-in-one',

'exam',

'cd-rom',

'exam',

'oracle',

'press'],

4

['clinical',

'ethic',

'practical',

'ethical',

'decision',

'clinical',

'medicine',

'seventh',

'lange',

'clinical',

'science'],

['understand', 'linguistics', 'teach', 'yourself'],

['harrison', 'principle', 'internal', 'medicine'],

['basic', 'clinical', 'pharmacology', 'lange', 'basic', 'science'],

['harrison',

'principle',

'internal',

'medicine',

'self-assessment',

'board',

'review'],

['strategic',

'database',

'marketing',

'masterplan',

'starting',

'managing',

'profitable',

'customer-based',

'marketing',

'program'],

['review',

'medical',

'microbiology',

'immunology',

'twelfth',

'lange',

'medical',

'book'],

['tintinalli',

'emergency',

'medicine',

'manual',

'emergency',

'medicine',

'tintinalli'],

['pharmacotherapy', 'pathophysiologic'],

['review', 'medical', 'microbiology', 'immunology'],

5

['computer', 'organization'],

['risk', 'management', 'insurance'],

['design', 'analog', 'cmos', 'integrated', 'circuit'],

['plant', 'design', 'economics', 'chemical', 'engineer'],

['fluid', 'mechanic', 'engineering'],

['operating', 'system', 'spiral'],

['anatomy', 'physiology'],

['database', 'management', 'system'],

['probability',

'statistic',

'principle',

'engineering',

'computing',

'science'],

['health', 'program', 'planning', 'educational', 'ecological'],

['geographic',

'information',

'system',

'forestry',

'natural',

'resource',

'management'],

['fluid', 'mechanic', 'chemical', 'engineer'],

['adaptation', 'studying', 'film', 'literature'],

['american', 'art', 'history', 'culture'],

['evolution', 'earth'],

['woman', 'study', 'gender', 'transnational', 'world'],

['aerodynamics'],

['forensics', 'criminalistics'],

['biological', 'evolution'],

['anatomy', 'physiology', 'integrative'],

['chemical',

'engineering',

'thermodynamics',

'chemical',

'engineering',

'thermodynamics'],

['human', 'geography'],

['theater', 'experience'],

['bioinformatics', 'computing', 'perspective'],

['experience', 'sociology'],

['electronic', 'principle'],

['earth', 'earth', 'science'],

['world', 'regional', 'geography'],

['physical', 'geology'],

['stern', 'introductory', 'plant', 'biology'],

['business', 'statistic', 'communicating'],

['digital', 'electronics', 'principle'],

6

['medical', 'insurance', 'pharmacy', 'technician'],

['insurance', 'medical', 'office', 'patient', 'payment'],

['law', 'ethic', 'health', 'profession'],

['foundation', 'microbiology'],

['microbiology', 'human', 'perspective'],

['programming', 'java', 'solving'],

['modern', 'computer', 'network', 'source'],

['business', 'driven', 'information', 'system'],

['real',

'estate',

'finance',

'investment',

'real',

'estate',

'finance',

'investment'],

['human', 'anatomy'],

['hole', 'human', 'anatomy', 'physiology'],

['seeley', 'principle', 'anatomy', 'physiology'],

['anatomy', 'physiology', 'unity', 'form', 'function'],

['seeley', 'anatomy', 'physiology'],

['hole', 'human', 'anatomy', 'physiology'],

['human', 'anatomy'],

['vander', 'human', 'physiology'],

['power', 'choice', 'political', 'science'],

['history', 'western', 'art'],

['reconstructing', 'gender'],

['mechanic', 'material'],

['fluid', 'mechanic'],

['microelectronic', 'circuit', 'design'],

['electric', 'circuit'],

['cmos', 'digital', 'integrated', 'circuit', 'analysis', 'design'],

['microelectronics', 'circuit', 'analysis', 'design'],

['computer', 'organization'],

['power', 'electronics'],

['theater', 'experience'],

['finance', 'theory'],

['investment', 'mcgraw-hill/irwin', 'finance', 'insurance', 'real', 'estate'],

['corporate',

'finance',

'mcgraw-hill/irwin',

'finance',

'insurance',

'real',

'estate'],

['understanding', 'human', 'sexuality'],

['biology'],

['ecology', 'concept'],

7

['environmental', 'science'],

['narrative', 'history', 'volume'],

['differential', 'equation', 'engineer', 'scientist'],

['elementary', 'statistic'],

['film', 'history'],

['film', 'art'],

['art', 'watching', 'film'],

['studying', 'religion'],

['aerodynamics'],

['vector', 'mechanic', 'engineer', 'static', 'dynamic'],

['thermodynamics', 'engineering'],

['simulation', 'modeling', 'analysis'],

['statistical', 'technique', 'business', 'economics'],

['business', 'statistic'],

['microbiology', 'clinical'],

['prescott', 'microbiology'],

['microbiology', 'system'],

['contemporary', 'nutrition'],

['chemistry', 'molecular', 'nature', 'matter', 'change'],

['chemistry'],

['chemistry'],

['principle', 'chemistry'],

['chemistry'],

['organic', 'chemistry'],

['chemistry', 'concept'],

['organic', 'biochemistry'],

['organic', 'chemistry'],

['organic', 'biological', 'chemistry'],

['business', 'communication', 'developing', 'leader', 'networked', 'world'],

['lesikar', 'business', 'communication', 'connecting', 'digital', 'world'],

['business', 'administrative', 'communication'],

['business', 'communication', 'building', 'critical', 'skill'],

['concept', 'biology'],

['biology', 'concept', 'investigation'],

['human', 'physiology'],

['seeley', 'anatomy', 'physiology'],

['mader', 'understanding', 'human', 'anatomy', 'physiology'],

['hole', 'human', 'anatomy', 'physiology'],

['advertising',

'promotion',

'integrated',

'marketing',

'communication',

'perspective'],

['sex', 'gender'],

['unfinished', 'nation', 'concise', 'history', 'american', 'people'],

['experiencing', 'world', 'religion'],

['chemistry', 'atom'],

8

['chemistry', 'atom'],

['economics'],

['economics'],

['economics', 'mcgraw-hill', 'economics'],

['urban', 'economics'],

['environmental', 'economics', 'mcgraw-hill'],

['broadcasting',

'cable',

'internet',

'beyond',

'modern',

'electronic',

'medium'],

['pathway', 'astronomy'],

['college', 'physic', 'integrated', 'force', 'kinematics'],

['astronomy', 'journey', 'cosmic', 'frontier'],

['physic', 'everyday', 'phenomenon'],

['exploration', 'astronomy'],

['pathway', 'astronomy'],

['medical', 'insurance', 'integrated', 'claim', 'process'],

['university', 'physic', 'modern', 'physic'],

['physical', 'science'],

['physic', 'everyday', 'phenomenon'],

['theater', 'lively', 'art'],

['anthology', 'living', 'theater'],

['survey', 'operating', 'system'],

['survey', 'operating', 'system'],

['basic', 'statistic', 'business', 'economics'],

['applied', 'statistic', 'business', 'economics'],

['business',

'statistic',

'practice',

'mcgraw-hill/irwin',

'operation',

'decision',

'science'],

['business', 'research', 'method'],

['microbiology', 'system'],

['foundation', 'microbiology'],

['wardlaw', 'perspective', 'nutrition'],

['contemporary', 'world', 'regional', 'geography'],

['geography'],

['geography'],

['chemistry', 'context'],

['international', 'economics'],

['economics', 'basic', 'mcgraw-hill/irwin', 'economics'],

['labor', 'economics'],

['managerial', 'economics', 'business', 'strategy'],

9

['computer', 'network', 'top'],

['object-oriented', 'programming', 'java'],

['database', 'system', 'concept'],

['java', 'programming', 'ground'],

['algorithm'],

['earth', 'earth', 'science'],

['environmental', 'geology'],

['exploring', 'geology'],

['laboratory', 'manual', 'physical', 'geology'],

['marine', 'biology'],

['vertebrate', 'comparative', 'anatomy', 'function', 'evolution'],

['understanding', 'business'],

['employment', 'law', 'business'],

['business', 'law'],

['matching', 'supply', 'demand', 'operation', 'management'],

['molecular', 'biology'],

['human', 'biology'],

['biology'],

['biology'],

['concept', 'biology'],

['biology', 'concept', 'investigation'],

['human', 'anatomy'],

['electronic', 'medium'],

['medium', 'ethic', 'issue'],

['power', 'choice', 'political', 'science'],

['music', 'appreciation'],

['world', 'music', 'tradition', 'transformation'],

['sociology'],

['foundation', 'material', 'science', 'engineering'],

['shigley',

'mechanical',

'engineering',

'design',

'mcgraw-hill',

'mechanical',

'engineering'],

['thermodynamics', 'engineering'],

['engineering', 'circuit', 'analysis'],

['semiconductor', 'physic', 'device'],

['international', 'marketing'],

['consumer', 'behavior', 'building', 'marketing', 'strategy'],

['personal',

'finance',

'mcgraw-hill/irwin',

'finance',

'insurance',

'real',

'estate'],

10

['corporate',

'finance',

'core',

'principle',

'mcgraw-hill/irwin',

'finance',

'insurance',

'real',

'estate'],

['personal', 'finance'],

['sexuality'],

['transformation', 'woman', 'gender', 'psychology'],

['human', 'sexuality', 'self', 'society', 'culture'],

['biology'],

['biology', 'laboratory', 'manual'],

['calculus', 'transcendental', 'function'],

['applied',

'calculus',

'business',

'economics',

'social',

'life',

'science',

'expanded'],

['calculus', 'business', 'economics', 'social', 'life', 'science', 'version'],

['ecology', 'global', 'insight', 'investigation'],

['ecology', 'concept'],

['principle', 'environmental', 'science', 'inquiry'],

['film', 'art'],

['scripture', 'world', 'religion'],

['ethical',

'issue',

'modern',

'medicine',

'contemporary',

'reading',

'bioethics'],

['philosophy'],

['thermodynamics', 'heat', 'transfer', 'ee', 'software'],

['earth', 'earth', 'science'],

['unfinished',

'nation',

'concise',

'history',

'american',

'people',

'volume'],

['foundation', 'microbiology', 'basic', 'principle'],

11

['exploration', 'astronomy'],

['organic', 'chemistry'],

['thermodynamics', 'engineering', 'student', 'resource', 'dvd'],

['american', 'history', 'survey', 'volume'],

['american', 'history', 'survey', 'volume'],

['vector', 'mechanic', 'engineer', 'static'],

['vector', 'mechanic', 'engineer', 'dynamic'],

['unfinished',

'nation',

'concise',

'history',

'american',

'people',

'volume'],

['thermal-fluid', 'science', 'student', 'resource', 'dvd'],

['fluid', 'mechanic', 'student', 'dvd'],

['music', 'appreciation'],

['loose-leaf', 'financial', 'accounting'],

['foundation',

'financial',

'management',

'time',

'value',

'money',

'mcgraw-hill/irwin',

'finance',

'insurance',

'real',

'estate'],

['elementary', 'statistic', 'step', 'step', 'formula', 'data'],

['principle', 'organic', 'biochemistry', 'connect'],

['microbiology', 'system', 'connect'],

['ecology', 'global', 'insight', 'investigation', 'connect'],

['loose-leaf', 'understanding', 'business'],

['corporate',

'finance',

'alternate',

'mcgraw-hill/irwin',

'finance',

'insurance',

'real',

'estate'],

['biology'],

['loose', 'leaf', 'version', 'anatomy', 'physiology', 'integrative'],

['loose', 'leaf', 'version', 'human', 'anatomy', 'physiology'],

['loose', 'leaf', 'biology'],

['anatomy',

'physiology',

12

'unity',

'form',

'function',

'connect',

'plus/learnsmart',

'semester',

'includes',

'apr',

'phils',

'online'],

['loose-leaf', 'principle', 'corporate', 'finance'],

['experience', 'music', 'audio', 'cd'],

['loose', 'leaf', 'version', 'exploring', 'geology'],

['loose', 'leaf', 'organic', 'biochemistry'],

['loose', 'leaf', 'version', 'human', 'physiology'],

['elementary', 'statistic', 'data', 'formula'],

['electronics', 'principle', 'student', 'data', 'cd-rom'],

['biology', 'connect'],

['exploring', 'geology', 'connect'],

['music', 'appreciation', 'connect', 'upgrade'],

['international', 'business', 'challenge', 'global', 'competition'],

['intermediate', 'accounting', 'annual', 'report'],

['microbiology', 'clinical', 'connect', 'learnsmart', 'semester'],

['loose-leaf', 'organic', 'chemistry'],

['benson',

'microbiological',

'complete',

'version',

'brown',

'microbioligical'],

['film', 'art', 'connect'],

['practical',

'business',

'math',

'procedure',

'handbook',

'student',

'dvd',

'wsj',

'insert'],

['loose', 'leaf', 'cost', 'accounting', 'connect'],

['foundation', 'microbiology', 'basic', 'principle'],

['seeley', 'anatomy', 'physiology', 'connect'],

['loose', 'leaf', 'version', 'environmental', 'geology'],

['loose', 'leaf', 'version', 'environmental', 'geology'],

['exploring', 'social', 'psychology'],

['music', 'appreciation'],

['marketing'],

13

['marketing', 'marketing', 'strategy', 'planning'],

['marketing', 'management'],

['preface', 'marketing', 'management'],

['marketing'],

['personal',

'finance',

'mcgraw-hill/irwin',

'finance',

'insurance',

'real',

'est'],

['finance', 'theory'],

['study',

'finance',

'managing',

'corporate',

'value',

'creation',

'mcgraw-hill/irwin',

'finance',

'insurance',

'real',

'estate'],

['health', 'psychology'],

['international', 'accounting'],

['ethical', 'obligation', 'decision', 'accounting', 'text'],

['advanced', 'accounting'],

['advanced', 'accounting'],

['combo', 'wardlaw', 'perspective', 'nutrition', 'connect', 'semester'],

['anatomy',

'physiology',

'integrative',

'connect',

'plus/learnsmart',

'semester',

'card/apr',

'online',

'access/phils',

'online'],

['statistical', 'technique', 'business', 'economics'],

['business', 'statistic'],

['business', 'statistic'],

['nutritional', 'assessment'],

['human', 'geography', 'landscape', 'human', 'activity'],

['international', 'economics', 'mcgraw-hill', 'economics'],

['public', 'finance'],

['economics', 'mcgraw-hill', 'economics'],

['managerial',

14

'economics',

'foundation',

'business',

'analysis',

'strategy',

'mcgraw-hill',

'economics'],

['economics'],

['economics', 'principle', 'policy', 'mcgraw-hill', 'economics'],

['contemporary', 'labor', 'economics', 'mcgraw-hill', 'economics'],

['exercise', 'physiology', 'theory', 'fitness', 'performance'],

['concept', 'physical', 'fitness', 'active', 'lifestyle', 'wellness'],

['physical', 'education', 'exercise', 'science', 'sport', 'study'],

['vertebrate', 'comparative', 'anatomy', 'function', 'evolution'],

['business', 'changing', 'world'],

['law', 'business'],

['biology'],

['power', 'choice', 'political', 'science'],

['music', 'appreciation'],

['music', 'appreciation'],

['music', 'appreciation'],

['accounting', 'mean'],

['financial', 'statement', 'analysis', 'security', 'valuation'],

['managerial', 'accounting'],

['managerial', 'accounting', 'manager'],

['cost', 'accounting'],

['financial', 'accounting'],

['auditing',

'accounting',

'investigating',

'issue',

'fraud',

'professional',

'ethic'],

['advanced', 'financial', 'accounting'],

['financial', 'managerial', 'accounting'],

['criminology'],

['sociology'],

['sociology'],

['sociology', 'core'],

['mapping', 'social', 'landscape', 'reading', 'sociology'],

['sociology', 'module'],

['practical', 'skeptic', 'core', 'concept', 'sociology'],

['practical', 'skeptic', 'reading', 'sociology'],

['social',

'construction',

'difference',

'inequality',

15

'race',

'class',

'gender',

'sexuality'],

['sociology', 'matter'],

['sociological', 'theory'],

['applied', 'circuit', 'analysis'],

['design', 'operational', 'amplifier', 'analog', 'integrated', 'circuit'],

['analog', 'circuit', 'design', 'discrete', 'integrated'],

['marketing', 'management', 'strategic', 'decision-making'],

['marketing', 'research'],

['marketing', 'management'],

['marketing'],

['advertising'],

['marketing', 'core'],

['marketing', 'strategy', 'decision-focused'],

['basic', 'marketing', 'marketing', 'strategy', 'planning'],

['marketing'],

['business', 'management', 'entrepreneur', 'guidebook'],

['international', 'business'],

['global', 'marketing', 'contemporary', 'theory', 'practice'],

['entrepreneurial', 'business'],

['business',

'ethic',

'decision-making',

'personal',

'integrity',

'social',

'responsibility'],

['business', 'society', 'stakeholder', 'ethic', 'public', 'policy'],

['corporate',

'finance',

'standard',

'mcgraw-hill/irwin',

'finance',

'insurance',

'real',

'estate'],

['corporate',

'finance',

'mcgraw-hill/irwin',

'finance',

'insurance',

'real',

'esta'],

['financial',

'market',

'institution',

16

'mcgraw-hill/irwin',

'finance',

'insurance',

'real',

'estate'],

['analysis', 'financial', 'management'],

['corporate', 'finance'],

['principle',

'corporate',

'finance',

'mcgraw-hill/irwin',

'finance',

'insurance',

'real',

'estate',

'mcgraw-hill/irwin',

'finance',

'insureance',

'real',

'estate'],

['corporate', 'finance'],

['focus',

'personal',

'finance',

'active',

'help',

'you',

'develop',

'successful',

'financial',

'skill',

'mcgraw-hill/irwin',

'finance',

'insurance',

'real',

'esta'],

['core', 'concept', 'cultural', 'anthropology'],

['magic', 'witchcraft', 'religion', 'reader', 'anthropology', 'religion'],

['human', 'specie', 'biological', 'anthropology'],

['cultural', 'anthropology', 'appreciating', 'cultural', 'diversity'],

['anthropology', 'appreciating', 'human', 'diversity'],

['mirror', 'humanity', 'concise', 'cultural', 'anthropology'],

['introducing', 'anthropology', 'integrated'],

['psychology'],

['psychology', 'perspective', 'connection'],

['understanding', 'psychology'],

['understanding', 'psychology'],

['social', 'psychology'],

17

['psychological', 'testing', 'assessment', 'test', 'measurement'],

['human', 'sexuality', 'diversity', 'contemporary', 'america'],

['experience', 'psychology'],

['personality', 'psychology', 'domain', 'knowledge', 'human', 'nature'],

['abnormal', 'psychology'],

['understanding', 'human', 'sexuality'],

['science', 'psychology', 'appreciative', 'view'],

['looseleaf', 'sexuality'],

['communicating', 'strategy', 'success', 'business', 'profession'],

['basic', 'reading', 'moral', 'philosophy'],

['element', 'moral', 'philosophy'],

['experiencing',

'world',

'religion',

'loose',

'leaf',

'tradition',

'challenge',

'change'],

['business', 'ethic'],

['philosophy', 'power', 'idea'],

['doe', 'center', 'hold', 'western', 'philosophy'],

['moral', 'story', 'ethic'],

['physical', 'education', 'fitness', 'sport'],

['environmental', 'geology'],

['laboratory', 'manual', 'physical', 'geology'],

['biology'],

['managerial', 'accounting'],

['experiencing', 'race', 'class', 'gender', 'united'],

['meaning',

'difference',

'american',

'construction',

'race',

'sex',

'gender',

'social',

'class',

'sexual',

'orientation',

'disability'],

['service', 'marketing'],

['global', 'business'],

['business', 'government', 'society', 'managerial', 'perspective'],

['culture', 'sketch', 'study', 'anthropology'],

['scripture', 'world', 'religion'],

['elementary', 'statistic', 'step', 'step', 'formula'],

['forensic', 'accounting'],

18

['world', 'history', 'texas'],

['business', 'management', 'real-world', 'connection'],

['bscs', 'biology', 'molecular'],

['aerodynamics', 'engineering', 'student', 'sixth'],

['real', 'analysis', 'workbook', 'solution'],

['principle', 'real', 'analysis'],

['probability', 'measure', 'theory'],

['computer',

'organization',

'design',

'companion',

'morgan',

'kaufmann',

'computer',

'architecture',

'design'],

['probability', 'statistic', 'engineer', 'scientist'],

['computer', 'organization', 'design', 'hardware/software', 'interface'],

['digital',

'evidence',

'computer',

'crime',

'forensic',

'science',

'computer',

'internet'],

['computer', 'organization', 'design'],

['advanced', 'topic', 'forensic', 'dna', 'typing'],

['computer',

'organization',

'design',

'revised',

'hardware/software',

'interface',

'morgan',

'kaufmann',

'computer',

'architecture',

'design'],

['data',

'mining',

'practical',

'machine',

'learning',

'tool',

'technique',

'morgan',

'kaufmann',

19

'data',

'management',

'system'],

['forensic', 'science'],

['probability', 'model', 'tenth'],

['molecular', 'biology', 'understanding', 'genetic', 'revolution'],

['differential', 'equation', 'dynamical', 'system', 'chaos'],

['security', 'ninth'],

['computer', 'network', 'system'],

['molecular', 'biology', 'technique', 'classroom', 'laboratory', 'manual'],

['forestry', 'natural', 'resource'],

['computer', 'machine', 'vision', 'theory', 'algorithm', 'practicality'],

['probability', 'random', 'process', 'signal', 'processing', 'communication'],

['course', 'real', 'analysis'],

['computer',

'organization',

'design',

'fifth',

'hardware/software',

'interface',

'morgan',

'kaufmann',

'computer',

'architecture',

'design'],

['probability', 'model', 'eleventh'],

['homeland', 'security', 'principle', 'all-hazards', 'response'],

['semi-riemannian', 'geometry', 'relativity'],

['electronic', 'commerce', 'managerial', 'perspective'],

['microprocessor', 'hardware', 'software', 'principle'],

['soul', 'cinema', 'appreciation', 'film', 'music'],

['computer', 'organization', 'architecture', 'designing', 'performance'],

['microprocessor', 'programming', 'interfacing', 'software', 'hardware'],

['real', 'analysis'],

['differential', 'equation', 'linear', 'algebra'],

['musical', 'theater', 'appreciation'],

['microprocessor', 'microcomputer', 'hardware', 'software'],

['literature', 'latin', 'america', 'antiquity'],

['applied',

'partial',

'differential',

'equation',

'fourier',

'boundary',

'value'],

['wildlife', 'ecology', 'management'],

['forest',

'ecology',

20

'foundation',

'sustainable',

'forest',

'management',

'environmental',

'ethic',

'forestry'],

['digital+microprocessor'],

['data', 'structure', 'c++', 'using', 'stl'],

['digital', 'integrated', 'circuit', 'design', 'perspective'],

['microprocessor', 'programming', 'interfacing', 'software', 'hardware'],

['speech',

'language',

'processing',

'natural',

'language',

'processing',

'computational',

'linguistics',

'speech',

'recognition'],

['structured', 'computer', 'organization'],

['power', 'electronics', 'circuit', 'device'],

['programming', 'language', 'ansi', 'version'],

['quantum', 'mechanic'],

['digital',

'electronics',

'laboratory',

'experiment',

'using',

'xilinx',

'cpld',

'xilinx',

'foundation',

'design',

'simulation',

'software',

'design',

'simulation',

'software'],

['thermodynamics', 'heat', 'power'],

['forensic', 'science', 'crime', 'scene', 'crime', 'lab'],

['risk', 'management', 'insurance', 'prentice', 'hall', 'finance'],

['concept', 'programming', 'language'],

['parallel',

'programming',

'technique',

'using',

21

'networked',

'workstation',

'parallel',

'computer'],

['adts', 'data', 'structure', 'solving', 'c++'],

['operating', 'system', 'design', 'implementation'],

['differential', 'equation'],

['real', 'analysis'],

['business',

'law',

'legal',

'e-commerce',

'ethical',

'global',

'environment'],

['probability', 'stochastic', 'process'],

['probability', 'random', 'process'],

['circuit', 'design', 'theory'],

['partial', 'differential', 'equation', 'fourier', 'boundary', 'value'],

['structured', 'computer', 'organization'],

['principle', 'computer', 'organization', 'assembly', 'language'],

['physic', 'scientist', 'engineer', 'modern', 'physic'],

['computing', 'programming', 'java', 'multimedia'],

['solid', 'electronic', 'device'],

['sacred', 'path', 'understanding', 'religion', 'world'],

['motorola',

'microprocessor',

'family',

'assembly',

'language',

'interface',

'design',

'system',

'design'],

['linguistic', 'perspective', 'language', 'education'],

['principle', 'data', 'network', 'computer', 'communication'],

['listening', 'music'],

['cognition', 'psychology', 'mind', 'brain'],

['history', 'eastern', 'art'],

['testament', 'greek', 'beginner'],

['differential', 'equation', 'linear', 'algebra'],

['database', 'system'],

['political', 'science'],

['calculus', 'graphical', 'numerical', 'algebraic'],

['marketing', 'management'],

['cost', 'accounting'],

['computer', 'network'],

['mechanical', 'vibration'],

22

['programming',

'using',

'visual',

'basic',

'pearson',

'custom',

'computer',

'science'],

['data', 'structure', 'object', 'using', 'c++'],

['database', 'processing'],

['electronic', 'commerce', 'managerial', 'social', 'network', 'perspective'],

['international', 'economics', 'theory', 'policy'],

['strategic',

'management',

'business',

'policy',

'achieving',

'sustainability'],

['crisis',

'intervention',

'criminal',

'justice',

'response',

'chaos',

'mayhem',

'disorder'],

['java', 'solving', 'programming', 'myprogramminglab'],

['balanced', 'computer', 'science'],

['reinforced', 'concrete', 'mechanic', 'design'],

['marketing', 'real', 'people', 'real', 'choice'],

['marketing', 'defined', 'explained', 'applied'],

['technique', 'microbiology', 'student', 'handbook'],

['aerodynamics', 'engineer'],

['physic', 'scientist', 'engineer', 'modern', 'physic'],

['physic', 'scientist', 'engineer', 'chap'],

['operating', 'system', 'internals', 'design', 'principle'],

['design', 'analysis', 'algorithm'],

['forensic', 'psychology'],

['security', 'computing'],

['international',

'monetary',

'financial',

'economics',

'pearson',

'economics'],

['framework', 'marketing', 'management'],

['electronic', 'device', 'conventional', 'current', 'version'],

['accounting', 'information', 'system'],

23

['cost', 'accounting', 'student', 'value'],

['advanced', 'accounting'],

['computer', 'science', 'overview'],

['java', 'program', 'late', 'object'],

['java', 'program'],

['database', 'system', 'database', 'data', 'warehouse'],

['data', 'structure', 'object', 'using', 'java'],

['data', 'structure', 'algorithm', 'analysis', 'java'],

['neuro-fuzzy',

'soft',

'computing',

'computational',

'learning',

'machine',

'intelligence'],

['educational', 'psychology'],

['convergence',

'race',

'ethnicity',

'gender',

'multiple',

'identity',

'counseling',

'merrill',

'counseling'],

['electronic',

'health',

'record',

'understanding',

'using',

'computerized',

'medical',

'record'],

['electronic', 'device', 'circuit', 'theory'],

['modern', 'database', 'management'],

['hinduism'],

['contemporary',

'business',

'online',

'commerce',

'law',

'legal',

'internet',

'ethical',

'global',

'environment'],

['programming', 'world', 'wide', 'web'],

['computer', 'forensics', 'cyber', 'crime'],

24

['chemical',

'engineering',

'thermodynamics',

'prentice',

'hall',

'international',

'physical',

'chemical',

'engineering',

'science'],

['global', 'marketing'],

['e-commerce'],

['economics',

'money',

'banking',

'financial',

'market',

'business',

'school',

'pearson',

'economics'],

['database', 'concept'],

['business', 'data', 'network', 'security'],

['multinational', 'business', 'finance', 'pearson', 'finance'],

['bond', 'market', 'analysis', 'strategy'],

['marketing'],

['statistic', 'business', 'economics'],

['strategic', 'marketing'],

['management', 'science'],

['career',

'fitness',

'program',

'exercising',

'option',

'career',

'fitness',

'program',

'exercising',

'option',

'sukiennik',

'diane',

'author',

'paperback'],

['economics', 'money', 'banking', 'financial', 'market'],

['economics', 'health', 'health', 'care'],

['computer', 'security'],

['marketing', 'hospitality', 'tourism'],

['analyzing',

25

'computer',

'security',

'threat',

'vulnerability',

'countermeasure'],

['starting', 'programming', 'logic', 'design'],

['pharmacology', 'connection', 'nursing', 'practice'],

['speed', 'signaling', 'jitter', 'modeling', 'analysis', 'budgeting'],

['absolute', 'java'],

['aerodynamics', 'engineer'],

['data', 'structure', 'algorithm', 'analysis', 'c++'],

['ethic', 'information', 'age'],

['starting', 'java', 'control', 'structure', 'object'],

['computer', 'networking', 'top-down'],

['legal', 'environment', 'business', 'online', 'commerce'],

['business', 'law'],

['hospital',

'operation',

'principle',

'efficiency',

'health',

'care',

'press',

'operation',

'management'],

['engineering', 'mechanic', 'dynamic'],

['engineering', 'mechanic', 'static'],

['structured', 'computer', 'organization'],

['computing', 'programming', 'python'],

['computer',

'organization',

'architecture',

'william',

'stalling',

'book',

'computer',

'data',

'communication'],

['java', 'programming', 'comprehensive', 'version'],

['economics', 'micro', 'view'],

['international', 'economics', 'pearson', 'economics'],

['economics', 'principle', 'tool'],

['e-marketing'],

['criminology', 'justice'],

['business', 'communication'],

['business', 'communication'],

['criminological', 'theory'],

['starting',

26

'java',

'control',

'structure',

'object',

'myprogramminglab',

'pearson'],

['program'],

['international', 'business', 'reality'],

['corporate', 'finance'],

['principle', 'risk', 'management', 'insurance', 'pearson', 'finance'],

['future', 'option', 'market'],

['foundation', 'finance', 'pearson', 'finance'],

['engineering', 'mechanic', 'static', 'masteringengineering', 'pearson'],

['economics'],

['managerial', 'economics'],

['business', 'statistic'],

['principle', 'economics'],

['business',

'data',

'communications-',

'infrastructure',

'networking',

'security'],

['e-commerce'],

['pharmacology', 'nurse', 'pathophysiologic'],

['database', 'processing', 'design', 'implementation'],

['statistic', 'manager', 'using', 'microsoft', 'excel'],

['international', 'business', 'challenge', 'globalization'],

['marketing', 'research'],

['principle', 'marketing'],

['corporate', 'finance', 'core'],

['comprehensive',

'health',

'insurance',

'billing',

'coding',

'reimbursement',

'student',

'workbook'],

['java', 'software', 'structure', 'designing', 'using', 'data', 'structure'],

['mechanic', 'material'],

['neural', 'network', 'architecture', 'algorithm'],

['cryptography', 'network', 'security', 'principle', 'practice'],

['network', 'security', 'standard'],

['java', 'foundation'],

['criminology', 'integrative'],

['e-commerce'],

['computer', 'network', 'internet'],

27

['prentice', 'hall', 'earth', 'science'],

['computer', 'science', 'overview'],

['sociology'],

['operating', 'system', 'internals', 'design', 'principle'],

['history', 'dance', 'art', 'education'],

['hinduism', 'cultural', 'perspective'],

['wildlife',

'forest',

'forestry',

'principle',

'managing',

'forest',

'biological',

'diversity'],

['intel', 'microprocessor'],

['technical', 'communication', 'twenty-first', 'century'],

['criminalistics', 'forensic', 'science'],

['electronics', 'circuit', 'device'],

['principle', 'electric', 'circuit', 'conventional', 'current', 'version'],

['core', 'concept', 'pharmacology', 'mynursingkit'],

['contemporary',

'clinical',

'immunology',

'serology',

'pearson',

'clinical',

'laboratory',

'science'],

['criminology', 'sociological', 'understanding'],

['homeland', 'security'],

['health', 'physical', 'assessment', 'nursing'],

['understanding', 'pharmacology', 'health', 'profession'],

['doe', 'earth', 'physical', 'geology', 'process', 'science'],

['differential', 'equation', 'computing', 'modeling'],

['elementary', 'differential', 'equation', 'boundary', 'value'],

['course', 'database', 'system'],

['e-commerce', 'business', 'technology', 'society'],

['modern', 'operating', 'system'],

['industrial', 'ecology', 'sustainable', 'engineering'],

['contemporary', 'business', 'online', 'commerce', 'law'],

['earth', 'science'],

['christian', 'theological', 'tradition'],

['course', 'probability'],

['managing', 'business', 'process', 'flow'],

['motorola',

'microprocessor',

'family',

'assembly',

28

'language',

'interface',

'design',

'system',

'design'],

['differential', 'equation', 'linear', 'algebra'],

['art', 'history'],

['solving', 'data', 'structure', 'using', 'java', 'multimedia'],

['computer', 'network', 'internet'],

['history', 'modern', 'art'],

['introductory',

'chemical',

'engineering',

'thermodynamics',

'prentice',

'hall',

'international',

'physical',

'chemical',

'engineering',

'science'],

['hcs', 'microprocessor'],

['computer', 'organization', 'architecture', 'designing', 'performance'],

['marketing', 'management'],

['computer', 'networking', 'top-down'],

['marketing', 'research', 'applied', 'orientation'],

['contemporary', 'direct', 'interactive', 'marketing'],

['database', 'system'],

['electronic', 'commerce'],

['data', 'structure', 'abstraction', 'java'],

['service', 'marketing'],

['electronic', 'commerce', 'spectrum', 'book'],

['electric', 'circuit', 'masteringengineering'],

['principle',

'risk',

'management',

'insurance',

'prentice',

'hall',

'finance'],

['history', 'art', 'africa'],

['physic', 'scientist', 'engineer', 'modern', 'physic'],

['network', 'flow', 'theory', 'algorithm'],

['natural', 'language', 'processing', 'prolog', 'programmer'],

['educational', 'psychology', 'theory', 'practice', 'myeducationlab'],

['mastering',

'method',

'differentiated',

29

'instruction',

'culturally',

'linguistically',

'diverse',

'cld',

'student',

'myeducationkit'],

['criminology', 'integrative', 'mycrimekit'],

['core', 'java', 'volume', 'sun', 'core'],

['introductory', 'circuit', 'analysis'],

['linguistics', 'non-linguists', 'primer', 'exercise'],

['esol',

'teacher',

'classroom',

'teacher',

'linguistically',

'culturally',

'ethnically',

'diverse',

'student'],

['quantum',

'mechanic',

'engineering',

'material',

'science',

'applied',

'physic'],

['e-commerce'],

['international', 'economics', 'theory', 'policy', 'student', 'value'],

['islam', 'mysearchlab', 'religion'],

['life', 'health', 'insurance'],

['quantum', 'mechanic', 'chemistry'],

['molecular', 'thermodynamics', 'fluid-phase', 'equilibrium'],

['origin', 'virtue', 'human', 'instinct', 'evolution', 'cooperation'],

['rise', 'fall', 'athens', 'nine', 'greek', 'life'],

['evolution', 'true'],

['dramaturgy', 'american', 'theater', 'source', 'book', 'source', 'book'],

['basic', 'judaism'],

['regional',

'farm',

'bill',

'field',

'hearing',

'redmond',

'oregon',

'hearing',

'committee',

'agriculture',

30

'nutrition',

'forestry',

'united',

'senate',

'hundred',

'ninth',

'congress',

'session',

'august'],

['analytical',

'mechanic',

'relativity',

'quantum',

'mechanic',

'oxford',

'graduate',

'text'],

['hinduism', 'short'],

['judaism', 'short'],

['foundation', 'buddhism'],

['history', 'practice', 'ancient', 'astronomy'],

['oxford', 'history', 'islam'],

['testament', 'reader', 'christianity'],

['classical', 'greek', 'reader'],

['challenge', 'ethic', 'environmentalism', 'feminism', 'multiculturalism'],

['game', 'theory'],

['islamic', 'threat', 'myth', 'reality'],

['anne', 'orthwood', 'bastard', 'sex', 'law', 'virginia'],

['athenaze', 'ancient', 'greek'],

['theorizing', 'feminism', 'reader'],

['testament', 'christian', 'writing', 'reader'],

['eight', 'theory', 'religion'],

['islam', 'straight', 'path'],

['playing', 'real', 'text', 'game', 'theory'],

['oxford', 'encyclopedia', 'islamic', 'world', 'six-volume', 'set'],

['buddhism'],

['doe', 'feminism', 'discriminate', 'debate'],

['game', 'theory', 'international'],

['microelectronic',

'circuit',

'oxford',

'electrical',

'computer',

'engineering'],

['film', 'theory', 'criticism'],

['magic', 'witchcraft', 'ghost', 'greek', 'roman', 'world', 'sourcebook'],

['vold', 'theoretical', 'criminology'],

['democracy',

31

'latin',

'america',

'political',

'change',

'comparative',

'perspective'],

['anthropology', 'doe', 'mean', 'human'],

['wave', 'oscillation', 'prelude', 'quantum', 'mechanic'],

['islam', 'straight', 'path'],

['conversation', 'historical', 'philosophy'],

['oxford', 'handbook', 'judaism', 'economics'],

['english', 'language', 'linguistic', 'history'],

['concise', 'world', 'religion'],

['chinese', 'south-east', 'asian', 'white', 'ware', 'found', 'philippine'],

['role',

'government',

'east',

'asian',

'economic',

'development',

'comparative',

'institutional',

'analysis'],

['chaos', 'nonlinear', 'dynamic', 'scientist', 'engineer'],

['chaos', 'time-series', 'analysis'],

['proboscidea',

'evolution',

'palaeoecology',

'elephant',

'relative',

'oxford',

'science',

'publication'],

['probability', 'random', 'process'],

['introducing', 'einstein', 'relativity'],

['dictionary', 'hinduism'],

['latin', 'dictionary', 'founded', 'andrew', 'freud', 'latin', 'dictionary'],

['greek-english', 'lexicon'],

['deuteronomy', 'judaean', 'diaspora'],

['feminism', 'film'],

['intermediate', 'greek-english', 'lexicon'],

['biochemistry', 'molecular', 'biology'],

['building', 'bioinformatics', 'solution', 'perl', 'mysql'],

['relativity', 'einstein', 'equation'],

['feminism', 'issue', 'argument'],

['canon', 'criterion', 'christian', 'theology', 'father', 'feminism'],

['evolution'],

['woman', 'indian', 'buddhism', 'comparative', 'textual', 'study'],

32

['cultural', 'anthropology', 'perspective', 'human', 'condition'],

['molecular', 'quantum', 'mechanic'],

['molecular', 'biology', 'principle', 'genome', 'function'],

['relativity',

'gravitation',

'cosmology',

'basic',

'oxford',

'master',

'physic'],

['molecular', 'biology', 'cancer', 'mechanism', 'target', 'therapeutic'],

['einstein',

'physic',

'atom',

'quantum',

'relativity',

'derived',

'explained',

'appraised'],

['medicinal', 'chemistry'],

['biochemistry', 'molecular', 'basis', 'life'],

['ecoimmunology'],

['invitation', 'world', 'religion'],

['gender', 'prism', 'difference'],

['testament', 'historical', 'christian', 'writing'],

['world', 'religion'],

['introducing', 'philosophy', 'text', 'integrated', 'reading'],

['crime', 'criminology'],

['ethic', 'history', 'theory', 'contemporary', 'issue'],

['exploring', 'philosophy', 'introductory', 'anthology'],

['philosophy', 'classical', 'contemporary', 'reading'],

['criminological', 'theory', 'evaluation'],

['christ',

'messiah',

'christ',

'language',

'paul',

'messiah',

'language',

'ancient',

'judaism'],

['talon', 'eagle', 'latin', 'america', 'united', 'world'],

['marine', 'biology', 'function', 'biodiversity', 'ecology'],

['american', 'popular', 'music'],

['physical', 'geography', 'global', 'environment'],

['buddhism', 'introducing', 'buddhist', 'experience'],

['oxford', 'latin', 'course', 'college', 'grammar', 'exercise', 'context'],

['oxford', 'latin', 'course', 'college', 'reading', 'vocabulary'],

33

['colonial', 'latin', 'america'],

['gendered', 'society'],

['gendered', 'society', 'reader'],

['josephus', 'theology', 'ancient', 'judaism'],

['modern', 'latin', 'america'],

['moral', 'life', 'introductory', 'reader', 'ethic', 'literature'],

['reading', 'greek', 'history', 'source', 'interpretation'],

['philosophy', 'quest', 'truth'],

['advanced', 'quantum', 'mechanic'],

['mechanic', 'thermodynamics', 'propulsion'],

['genetic', 'algorithm', 'search', 'optimization', 'machine', 'learning'],

['exploring', 'black', 'hole', 'relativity'],

['real', 'analysis', 'course'],

['modern', 'quantum', 'mechanic'],

['nonlinear', 'dynamics+chaos'],

['concrete', 'mathematics', 'foundation', 'computer', 'science'],

['computer', 'system', 'organization', 'architecture'],

['design', 'implementation', 'freebsd', 'operating', 'system'],

['course', 'probability'],

['art', 'computer', 'programming', 'volume', 'algorithm'],

['literature', 'reading', 'writing'],

['agenda',

'alternative',

'public',

'policy',

'update',

'epilogue',

'health',

'care',

'longman',

'classic',

'political',

'science'],

['greek', 'art', 'archaeology'],

['gender', 'psychological', 'perspective'],

['human', 'sexuality', 'mydevelopmentlab'],

['art', 'history'],

['business', 'ethic', 'concept'],

['jew', 'christian', 'muslim', 'comparative', 'monotheistic', 'religion'],

['medium', 'ethic', 'moral', 'reasoning'],

['english', 'linguistic'],

['bible'],

['concise', 'linguistics'],

['latin', 'america', 'people', 'combined', 'volume'],

['short', 'writing', 'biology'],

['vision', 'america', 'history', 'united', 'volume'],

['sociology', 'project', 'introducing', 'sociological', 'imagination'],

['sociology', 'down-to-earth'],

34

['sociology'],

['biological', 'anthropology'],

['backpack', 'literature', 'fiction', 'poetry', 'drama', 'writing'],

['religion', 'world'],

['history', 'world', 'religion'],

['psychology'],

['social', 'psychology', 'united'],

['core', 'question', 'philosophy', 'text', 'reading', 'mythinkinglab'],

['forensic', 'psychology'],

['human', 'sexuality'],

['human', 'sexuality', 'paper'],

['living', 'religion'],

['literature', 'fiction', 'poetry', 'drama', 'writing'],

['human', 'evolution', 'culture', 'highlight', 'anthropology'],

['conformity',

'conflict',

'reading',

'cultural',

'anthropology',

'myanthrolab'],

['short', 'writing', 'film'],

['art', 'history', 'myartslab'],

['physiology', 'behavior'],

['janson', 'basic', 'history', 'western', 'art', 'history', 'art'],

['statistic', 'psychology'],

['history', 'modern', 'art', 'paperback'],

['history', 'modern', 'art', 'volume'],

['human',

'biology',

'social',

'worker',

'development',

'ecology',

'genetics',

'health'],

['anthropology', 'latin', 'america', 'caribbean'],

['understanding', 'music'],

['head', 'broadcasting', 'america', 'survey', 'electronic', 'medium'],

['writing', 'political', 'science', 'practical'],

['history', 'film'],

['religion', 'matter', 'sociology', 'teach', 'religion', 'world'],

['literature'],

['visual', 'art', 'history', 'revised'],

['janson', 'history', 'art', 'western', 'tradition'],

['janson', 'history', 'art', 'western', 'tradition', 'volume', 'myartslab'],

['janson', 'history', 'art', 'western', 'tradition', 'volume'],

['flashback', 'film', 'history'],

['greek', 'history', 'culture', 'society'],

35

['history',

'italian',

'renaissance',

'art',

'paper',

'cover',

'mysearchlab',

'art'],

['photography',

'cultural',

'history',

'mysearchlab',

'art',

'mysearchlab',

'art'],

['latin', 'america', 'interpretive', 'history'],

['world', 'history'],

['consider',

'source',

'document',

'latin',

'american',

'history',

'interpretive',

'history'],

['sociology', 'religion', 'reader'],

['anthropology', 'religion', 'magic', 'witchcraft'],

['ultimate', 'question', 'thinking', 'philosophy'],

['seeing',

'ourselves',

'classic',

'contemporary',

'cross-cultural',

'reading',

'sociology'],

['anthropology', 'myanthrolab'],

['art', 'history', 'volume', 'myartslab'],

['art', 'history', 'volume', 'myartslab'],

['art', 'history', 'combined', 'myartslab'],

['woman',

'politics',

'american',

'society',

'longman',

'classic',

'political',

'science'],

['political', 'science'],

36

['look', 'art', 'history'],

['film', 'critical', 'mycommunicationkit'],

['sociology'],

['human', 'sexuality'],

['world', 'mysearchlab', 'political', 'science'],

['cultural', 'anthropology', 'globalizing', 'world'],

['forensic', 'anthropology'],

['empirical', 'political', 'analysis', 'mysearchlab', 'political', 'science'],

['challenge', 'world', 'development', 'mysearchlab', 'political', 'science'],

['janson', 'history', 'art', 'western', 'tradition', 'myartslab', 'pearson'],

['social', 'psychology'],

['statistic', 'behavioral', 'social', 'science'],

['sociology', 'education'],

['conscious', 'reader'],

['genderspeak', 'personal', 'effectiveness', 'gender', 'communication'],

['political', 'science', 'student', 'writer', 'manual'],

['thinking', 'woman', 'sociological', 'perspective', 'sex', 'gender'],

['understanding', 'psychology'],

['understanding',

'political',

'world',

'comparative',

'political',

'science',

'mypoliscilab'],

['psychology', 'life'],

['art', 'history'],

['art', 'history', 'volume'],

['art', 'history', 'portable', 'book'],

['art', 'history', 'portable', 'book'],

['art', 'history', 'portable', 'book'],

['art', 'history', 'portable', 'book'],

['art', 'history', 'volume'],

['religion', 'world', 'myreligionlab'],

['art', 'beyond', 'west'],

['sociology', 'down-to-earth', 'mysoclab'],

['sociology', 'down-to-earth'],

['thinking', 'woman', 'sociological', 'perspective', 'sex', 'gender'],

['literature', 'writing', 'process'],

['exploring', 'biological', 'anthropology'],

['human', 'sexuality', 'world', 'diversity'],

['world', 'religion'],

['forty', 'study', 'changed', 'psychology'],

['human',

'evolution',

'culture',

'highlight',

'anthropology',

37

'myanthrolab',

'pearson'],

['music', 'sight', 'singing'],

['art', 'history', 'volume', 'book', 'carte'],

['art', 'history', 'volume', 'book', 'carte', 'myartslab'],

['abnormal', 'psychology'],

['art', 'history', 'volume', 'myartslab'],

['art', 'history', 'myartslab'],

['human', 'sexuality', 'world', 'diversity', 'paperback'],

['living', 'religion'],

['living', 'religion', 'myreligionlab', 'pearson'],

['mastering', 'world', 'psychology'],

['political', 'science'],

['political', 'science', 'mypoliscilab', 'pearson'],

['short', 'course', 'photography', 'film', 'darkroom'],

['sociology'],

['human', 'sexuality'],

['sociology', 'down-to-earth'],

['greek-english', 'lexicon', 'testament', 'christian', 'literature'],

['greek', 'tragedy'],

['prediction', 'profiling', 'policing', 'punishing', 'actuarial', 'age'],

['travesti',

'sex',

'gender',

'culture',

'brazilian',

'transgendered',

'prostitute'],

['cult', 'territory', 'origin', 'greek', 'city-state'],

['disability',

'judaism',

'christianity',

'islam',

'sacred',

'text',

'historical',

'tradition',

'social',

'analysis'],

['politics',

'passion',

'woman',

'sexual',

'culture',

'afro-surinamese',

'diaspora'],

['source', 'east', 'asian', 'tradition'],

['source', 'east', 'asian', 'tradition', 'modern', 'period'],

38

['source', 'east', 'asian', 'tradition'],

['jewishness', 'critique', 'zionism', 'direction', 'critical', 'theory'],

['islam', 'america'],

['voice', 'vision', 'creative', 'narrative', 'film', 'production'],

['carlos', 'aldama', 'life', 'bat', 'cuba', 'diaspora', 'drum'],

['cognitive',

'model',

'speech',

'processing',

'psycholinguistic',

'computational',

'perspective',

'acl-mit',

'press',

'natural',

'language',

'processing'],

['machine', 'learning', 'adaptive', 'computation', 'machine', 'learning'],

['process', 'life', 'molecular', 'biology'],

['probabilistic',

'graphical',

'model',

'principle',

'technique',

'adaptive',

'computation',

'machine',

'learning'],

['circuit', 'design', 'simulation', 'vhdl'],

['machine',

'learning',

'probabilistic',

'perspective',

'adaptive',

'computation',

'machine',

'learning'],

['foundation',

'machine',

'learning',

'adaptive',

'computation',

'machine',

'learning'],

['game', 'theory', 'social', 'contract', 'playing'],

['algorithm'],

['strategy', 'game', 'theory', 'practice'],

['digital',

39

'performance',

'history',

'medium',

'theater',

'dance',

'performance',

'art',

'installation'],

['game', 'theory'],

['bioinformatics', 'algorithm'],

['foundation', 'statistical', 'natural', 'language', 'processing'],

['learning',

'kernel',

'support',

'vector',

'machine',

'regularization',

'optimization',

'beyond'],

['linguistics', 'sixth', 'language', 'communication'],

['exile', 'diaspora', 'stranger'],

['e-business', 'e-commerce', 'management'],

['computer', 'networking', 'james', 'kurose', 'keith', 'ross'],

['computer',

'organization',

'architecture',

'designing',

'performance',

'william',

'stalling'],

['defeat', 'bad', 'news', 'rwanda', 'musinga', 'africa', 'diaspora'],

['theater', 'technology'],

['theater', 'design'],

['theater', 'avant-garde', 'critical', 'anthology'],

['visual',

'judaism',

'late',

'antiquity',

'historical',

'context',

'jewish',

'art'],

['anthology', 'arabic', 'literature', 'culture', 'pre-islamic', 'time'],

['learn', 'read', 'greek', 'textbook', 'workbook', 'set'],

['relativity',

'gravitation',

'hundred',

'birth',

40

'albert',

'einstein',

'volume'],

['principle', 'quantum', 'mechanic'],

['psychotherapy', 'buddhism', 'integration'],

['handbook', 'urban', 'community', 'forestry', 'northeast'],

['molecular', 'immunology', 'complex', 'carbohydrate'],

['encyclopedia', 'diaspora', 'immigrant', 'refugee', 'culture', 'world'],

['capo',

'music',

'writing',

'finest',

'writing',

'rock',

'pop',

'jazz',

'country'],

['faith',

'journalist',

'investigates',

'toughest',

'objection',

'christianity'],

['basic', 'biblical', 'greek', 'workbook'],

['basic', 'biblical', 'greek', 'grammar'],

['stage', 'drama', 'classical', 'contemporary', 'theater'],

['exploring', 'american', 'history', 'volume', 'survey', 'source'],

['critical', 'vision', 'film', 'theory'],

['anatomy', 'film'],

['film'],

['contemporary', 'linguistics'],

['world', 'global', 'history', 'source', 'volume'],

['world', 'global', 'history', 'source', 'volume'],

['nvestigative', 'reporter', 'handbook', 'document', 'database', 'technique'],

['contemporary', 'linguistics', 'study'],

['america', 'concise', 'history', 'volume'],

['america', 'concise', 'history', 'volume'],

['rule', 'writer', 'writing', 'literature', 'tabbed', 'version'],

['literature', 'matter', 'anthology', 'reader', 'writer'],

['american', 'promise', 'volume', 'history', 'united'],

['history', 'world', 'society', 'volume'],

['woman',

'eye',

'american',

'history',

'document',

'woman',

'eye',

41

'american',

'history',

'document',

'dubois',

'ellen',

'carol',

'author',

'paperback'],

['film', 'experience'],

['ashe',

'traditional',

'religion',

'healing',

'sub-saharan',

'africa',

'diaspora',

'classified',

'international',

'bibliography',

'bibliography',

'index',

'afro-american',

'african',

'study'],

['broadway', 'encyclopedia', 'theater', 'american', 'culture'],

['feminism', 'woman', 'worldwide', 'volume', 'volume', 'woman', 'psychology'],

['material', 'criminal', 'law', 'american', 'casebook'],

['murray',

'flechtner',

'sale',

'lease',

'electronic',

'commerce',

'material',

'national',

'international',

'transaction'],

['sex-based', 'discrimination', 'text', 'material', 'american', 'casebook'],

['real', 'analysis'],

['longman', 'anthology', 'drama', 'theater', 'global', 'perspective'],

['writing', 'political', 'science'],

['computer', 'networking', 'internet'],

...]

In [10]: all_tokens[0]

Out[10]: ['philosophy', 'sex', 'love', 'reader']

In [11]: len(word_index_map)

42

Out[11]: 2070

In [12]: word_index_map["philosophy"]

Out[12]: 0

In [13]: check = pd.DataFrame(X)

check.iloc[:10,:80]

Out[13]: 0 1 2 3 4 5 6 7 8 9 ... 70 71 72 73 \

0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0

1 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 ... 0.0 0.0 0.0 0.0

2 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 ... 0.0 0.0 0.0 0.0

3 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0

4 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 ... 0.0 0.0 0.0 0.0

5 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 ... 0.0 0.0 0.0 0.0

6 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 ... 0.0 0.0 0.0 0.0

7 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 ... 0.0 0.0 0.0 0.0

8 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0

9 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 1.0

74 75 76 77 78 79

0 0.0 0.0 0.0 0.0 0.0 0.0

1 0.0 0.0 0.0 0.0 0.0 0.0

2 0.0 0.0 0.0 0.0 0.0 0.0

3 0.0 0.0 0.0 0.0 0.0 0.0

4 0.0 0.0 0.0 0.0 0.0 0.0

5 0.0 0.0 0.0 0.0 0.0 0.0

6 0.0 0.0 0.0 0.0 0.0 0.0

7 0.0 0.0 0.0 0.0 0.0 0.0

8 0.0 0.0 0.0 0.0 0.0 0.0

9 0.0 0.0 0.0 0.0 0.0 0.0

[10 rows x 80 columns]

In [16]: from __future__ import print_function, division

from builtins import range

svd = TruncatedSVD()

Z = svd.fit_transform(X)

plt.scatter(Z[:,0],Z[:,1])

for i in range(D):

plt.annotate(s=index_word_map[i], xy=(Z[i,0],Z[i,1]))

plt.show()

# Get current size

fig_size = plt.rcParams["figure.figsize"]

# Prints: [8.0, 6.0]

43

print("Current size:", fig_size)

# Set figure width to 12 and height to 9

fig_size[0] = 40

fig_size[1] = 20

plt.rcParams["figure.figsize"] = fig_size

Current size: [40.0, 20.0]

In [15]: pd.DataFrame(Z)

Out[15]: 0 1

0 0.082863 0.090046

1 0.137366 0.204138

2 0.025287 0.007821

3 0.071670 0.038627

4 0.161135 0.077085

5 0.164317 0.362638

6 0.091680 0.183416

7 0.129482 0.205153

8 0.143341 -0.016797

9 0.706761 -0.218631

10 0.007544 0.010085

11 0.007544 0.010085

12 0.007544 0.010085

13 0.074527 0.003684

14 0.139557 0.049312

15 0.986955 1.702502

44

16 0.248316 0.007972

17 0.086606 0.014585

18 0.211094 0.393591

19 0.032749 -0.006602

20 0.051950 -0.006406

21 0.348230 -0.037855

22 0.019748 -0.003450

23 0.012128 0.012000

24 0.873528 -0.231210

25 0.505580 0.645919

26 0.012128 0.012000

27 0.003020 0.004104

28 0.178211 0.297224

29 0.001877 0.002365

... ... ...

2040 0.005699 0.006780

2041 0.004367 0.002224

2042 0.004367 0.002224

2043 0.004367 0.002224

2044 0.004367 0.002224

2045 0.003226 0.004005

2046 0.004105 0.003348

2047 0.004105 0.003348

2048 0.017645 0.038801

2049 0.004105 0.003348

2050 0.004951 -0.001813

2051 0.004951 -0.001813

2052 0.004951 -0.001813

2053 0.004951 -0.001813

2054 0.004951 -0.001813

2055 0.013540 0.035453

2056 0.013540 0.035453

2057 0.013540 0.035453

2058 0.013540 0.035453

2059 0.013540 0.035453

2060 0.013540 0.035453

2061 0.013540 0.035453

2062 0.013540 0.035453

2063 0.011516 0.009690

2064 0.007192 0.005615

2065 0.011344 -0.001988

2066 0.010791 -0.000857

2067 0.010791 -0.000857

2068 0.010791 -0.000857

2069 0.010791 -0.000857

[2070 rows x 2 columns]

45