In [1]:
text = """
Once, there were two friends who were crossing the jungle.
After some time, they saw a bear coming towards them.
Then, one of the friends quickly climbed the nearby tree, and the other one did not know how to climb the tree.
So he lays down on the ground, holding his breath.
The bear reaches towards him and sniffs him in the ear.
After some time, the bear left the place, thinking the man was dead.
Now the other friend climbs down and asks his friend, What did bear say to him in his ear?
He replied, "
To be safe from the fake friends."
"""
In [2]:
text
Out[2]:
In [3]:
print(text)
In [7]:
mod_text = text.replace(",", "").replace(".", "").replace("?", "").replace("\"", "")
print(mod_text)
In [8]:
mod_text = text.replace(",", "").replace(".", "").replace("?", "").replace('"', "")
print(mod_text)
In [9]:
mod_text
Out[9]:
In [ ]:
In [11]:
mod_text = text.replace(",", "").replace(".", "").replace("?", "").replace('"', ""
).replace("\n", " ")
print(mod_text)
In [12]:
mod_text
Out[12]:
In [13]:
type(mod_text)
Out[13]:
In [14]:
words = mod_text.split(" ")
In [16]:
print(words)
In [17]:
[]
Out[17]:
In [30]:
[ word for word in words if len(word) == 0 ]
Out[30]:
In [32]:
mod_words = [ word for word in words if len(word) != 0 ]
In [33]:
print(mod_words)
In [38]:
word_lengths = [len(x) for x in mod_words ]
In [39]:
print(word_lengths)
In [41]:
n = len(word_lengths)
n
Out[41]:
In [43]:
s = sum(word_lengths)
s
Out[43]:
In [44]:
avg = s / n
avg
Out[44]:
In [47]:
import numpy as np
import statistics as st
In [46]:
np.mean(word_lengths)
Out[46]:
In [48]:
st.mean(word_lengths)
Out[48]:
In [ ]:
In [51]:
sorted_lengths = sorted(word_lengths)
print(sorted_lengths)
What is middle position?
In [52]:
n / 2
Out[52]:
As it is even, 52 and 53 are both in the middle
In [53]:
sorted_lengths[52]
Out[53]:
In [54]:
sorted_lengths[53]
Out[54]:
In [55]:
(sorted_lengths[52] + sorted_lengths[53]) / 2
Out[55]:
In [56]:
median = (sorted_lengths[52] + sorted_lengths[53]) / 2
median
Out[56]:
In [57]:
np.median(word_lengths)
Out[57]:
In [58]:
st.median(word_lengths)
Out[58]:
In [ ]:
In [59]:
st.mode(word_lengths)
Out[59]:
Mode is the item that appears highest number of times
In [61]:
word_lengths.count(0)
Out[61]:
In [62]:
word_lengths.count(1)
Out[62]:
In [63]:
word_lengths.count(2)
Out[63]:
In [64]:
word_lengths.count(3)
Out[64]:
In [65]:
word_lengths.count(4)
Out[65]:
In [66]:
word_lengths.count(5)
Out[66]:
In [67]:
min(word_lengths)
Out[67]:
In [68]:
max(word_lengths)
Out[68]:
In [72]:
for x in range(1, 9):
#print(x)
print(x, word_lengths.count(x) )
Mode is 3 as it appears 37 times, the most.
In [ ]:
In [73]:
import collections as cl
In [74]:
cl.Counter(word_lengths)
Out[74]:
So we got the frequency of items.
In [75]:
counter = cl.Counter(word_lengths)
In [76]:
import matplotlib.pyplot as plt
In [81]:
tuples = tuple( counter.items() )
tuples
Out[81]:
In [83]:
plt.plot(tuples)
Out[83]:
In [ ]: