content.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. __filename__ = "content.py"
  2. __author__ = "Bob Mottram"
  3. __license__ = "AGPL3+"
  4. __version__ = "1.0.0"
  5. __maintainer__ = "Bob Mottram"
  6. __email__ = "bob@freedombone.net"
  7. __status__ = "Production"
  8. import os
  9. import commentjson
  10. from shutil import copyfile
  11. def addMusicTag(content: str,tag: str) -> str:
  12. """If a music link is found then ensure that the post is tagged appropriately
  13. """
  14. if '#' not in tag:
  15. tag='#'+tag
  16. if tag in content:
  17. return content
  18. musicSites=['soundcloud.com','bandcamp.com']
  19. musicSiteFound=False
  20. for site in musicSites:
  21. if site+'/' in content:
  22. musicSiteFound=True
  23. break
  24. if not musicSiteFound:
  25. return content
  26. return content+' '+tag+' '
  27. def addWebLinks(content: str) -> str:
  28. """Adds markup for web links
  29. """
  30. if not ('https://' in content or \
  31. 'http://' in content or \
  32. 'dat://' in content):
  33. return content
  34. words=content.replace('\n',' --linebreak--').split(' ')
  35. replaceDict={}
  36. for w in words:
  37. if w.startswith('https://') or \
  38. w.startswith('http://') or \
  39. w.startswith('dat://'):
  40. if w.endswith('.') or w.endswith(';'):
  41. w=w[:-1]
  42. markup='<a href="'+w+'" rel="nofollow noopener" target="_blank">'
  43. if w.startswith('https://'):
  44. markup+='<span class="invisible">https://</span>'
  45. elif w.startswith('http://'):
  46. markup+='<span class="invisible">http://</span>'
  47. elif w.startswith('dat://'):
  48. markup+='<span class="invisible">dat://</span>'
  49. markup+='<span class="ellipsis">'+ \
  50. w.replace('https://','').replace('http://','').replace('dat://','')+ \
  51. '</span></a>'
  52. replaceDict[w]=markup
  53. for url,markup in replaceDict.items():
  54. content=content.replace(url,markup)
  55. content=content.replace(' --linebreak--','<br>')
  56. return content
  57. def validHashTag(hashtag: str) -> bool:
  58. """Returns true if the give hashtag contains valid characters
  59. """
  60. validChars = \
  61. set('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
  62. if set(hashtag).issubset(validChars):
  63. return True
  64. return False
  65. def addHashTags(wordStr: str, \
  66. httpPrefix: str,domain: str, \
  67. replaceHashTags: {},postHashtags: {}) -> bool:
  68. """Detects hashtags and adds them to the replacements dict
  69. Also updates the hashtags list to be added to the post
  70. """
  71. if not wordStr.startswith('#'):
  72. return False
  73. if len(wordStr)<2:
  74. return False
  75. if replaceHashTags.get(wordStr):
  76. return True
  77. hashtag=wordStr[1:]
  78. if not validHashTag(hashtag):
  79. return False
  80. hashtagUrl=httpPrefix+"://"+domain+"/tags/"+hashtag
  81. postHashtags[hashtag]= {
  82. 'href': hashtagUrl,
  83. 'name': '#'+hashtag,
  84. 'type': 'Hashtag'
  85. }
  86. replaceHashTags[wordStr]= \
  87. "<a href=\""+hashtagUrl+ \
  88. "\" class=\"mention hashtag\" rel=\"tag\">#<span>"+ \
  89. hashtag+"</span></a>"
  90. return True
  91. def loadEmojiDict(emojiDataFilename: str,emojiDict: {}) -> None:
  92. """Creates an emoji dictionary based on emoji/emoji-data.txt
  93. """
  94. if not os.path.isfile(emojiDataFilename):
  95. return
  96. with open (emojiDataFilename, "r") as fileHandler:
  97. for line in fileHandler:
  98. if len(line)<5:
  99. continue
  100. if line.startswith('#'):
  101. continue
  102. if '; Emoji' not in line:
  103. continue
  104. if ')' not in line:
  105. continue
  106. emojiUnicode=line.split(' ')[0]
  107. if len(emojiUnicode)<4:
  108. continue
  109. if '..' in emojiUnicode:
  110. emojiUnicode=emojiUnicode.split('..')[0]
  111. emojiName= \
  112. line.split(')',1)[1].strip().replace('\n','').replace(' ','').replace('-','')
  113. if '..' in emojiName:
  114. emojiName=emojiName.split('..')[0]
  115. emojiDict[emojiName.lower()]=emojiUnicode
  116. def addEmoji(baseDir: str,wordStr: str, \
  117. httpPrefix: str,domain: str, \
  118. replaceEmoji: {},postTags: {},emojiDict: {}) -> bool:
  119. """Detects Emoji and adds them to the replacements dict
  120. Also updates the tags list to be added to the post
  121. """
  122. if not wordStr.startswith(':'):
  123. return False
  124. if not wordStr.endswith(':'):
  125. return False
  126. if len(wordStr)<3:
  127. return False
  128. if replaceEmoji.get(wordStr):
  129. return True
  130. emoji=wordStr[1:]
  131. emoji=emoji[:-1]
  132. if not validHashTag(emoji):
  133. return False
  134. if not emojiDict.get(emoji):
  135. return False
  136. emojiFilename=baseDir+'/emoji/'+emojiDict[emoji]+'.png'
  137. if not os.path.isfile(emojiFilename):
  138. return False
  139. emojiUrl=httpPrefix+"://"+domain+"/emoji/"+emojiDict[emoji]+'.png'
  140. postTags[emoji]= {
  141. 'icon': {
  142. 'mediaType': 'image/png',
  143. 'type': 'Image',
  144. 'url': emojiUrl
  145. },
  146. 'name': ':'+emoji+':',
  147. 'type': 'Emoji'
  148. }
  149. return True
  150. def addMention(wordStr: str, \
  151. httpPrefix: str,following: str, \
  152. replaceMentions: {},recipients: [],tags: {}) -> bool:
  153. """Detects mentions and adds them to the replacements dict and recipients list
  154. """
  155. if not wordStr.startswith('@'):
  156. return False
  157. if len(wordStr)<2:
  158. return False
  159. possibleHandle=wordStr[1:]
  160. # @nick
  161. if following and '@' not in possibleHandle:
  162. # fall back to a best effort match against the following list
  163. # if no domain was specified. eg. @nick
  164. possibleNickname=possibleHandle
  165. for follow in following:
  166. if follow.startswith(possibleNickname+'@'):
  167. replaceDomain=follow.replace('\n','').split('@')[1]
  168. recipientActor= \
  169. httpPrefix+"://"+replaceDomain+"/users/"+possibleNickname
  170. if recipientActor not in recipients:
  171. recipients.append(recipientActor)
  172. tags[wordStr]={
  173. 'href': recipientActor,
  174. 'name': wordStr,
  175. 'type': 'Mention'
  176. }
  177. replaceMentions[wordStr]= \
  178. "<span class=\"h-card\"><a href=\""+httpPrefix+ \
  179. "://"+replaceDomain+"/@"+possibleNickname+ \
  180. "\" class=\"u-url mention\">@<span>"+ \
  181. possibleNickname+"</span></a></span>"
  182. return True
  183. return False
  184. possibleNickname=possibleHandle.split('@')[0]
  185. possibleDomain=possibleHandle.split('@')[1].strip('\n')
  186. if following:
  187. for follow in following:
  188. if follow.replace('\n','')!=possibleHandle:
  189. continue
  190. recipientActor= \
  191. httpPrefix+"://"+possibleDomain+"/users/"+possibleNickname
  192. if recipientActor not in recipients:
  193. recipients.append(recipientActor)
  194. tags[wordStr]={
  195. 'href': recipientActor,
  196. 'name': wordStr,
  197. 'type': 'Mention'
  198. }
  199. replaceMentions[wordStr]= \
  200. "<span class=\"h-card\"><a href=\""+httpPrefix+ \
  201. "://"+possibleDomain+"/@"+possibleNickname+ \
  202. "\" class=\"u-url mention\">@<span>"+possibleNickname+ \
  203. "</span></a></span>"
  204. return True
  205. # @nick@domain
  206. if '@' in possibleHandle:
  207. if not (possibleDomain=='localhost' or '.' in possibleDomain):
  208. return False
  209. recipientActor= \
  210. httpPrefix+"://"+possibleDomain+"/users/"+possibleNickname
  211. if recipientActor not in recipients:
  212. recipients.append(recipientActor)
  213. tags[wordStr]={
  214. 'href': recipientActor,
  215. 'name': wordStr,
  216. 'type': 'Mention'
  217. }
  218. replaceMentions[wordStr]= \
  219. "<span class=\"h-card\"><a href=\""+httpPrefix+ \
  220. "://"+possibleDomain+"/@"+possibleNickname+ \
  221. "\" class=\"u-url mention\">@<span>"+possibleNickname+ \
  222. "</span></a></span>"
  223. return True
  224. return False
  225. def addHtmlTags(baseDir: str,httpPrefix: str, \
  226. nickname: str,domain: str,content: str, \
  227. recipients: [],hashtags: {}) -> str:
  228. """ Replaces plaintext mentions such as @nick@domain into html
  229. by matching against known following accounts
  230. """
  231. if content.startswith('<p>'):
  232. return content
  233. content=content.replace('\n',' --linebreak-- ')
  234. content=addMusicTag(content,'nowplaying')
  235. words=content.replace(',',' ').replace(';',' ').split(' ')
  236. # remove . for words which are not mentions
  237. wordCtr=0
  238. newWords=[]
  239. for wordIndex in range(0,len(words)):
  240. wordStr=words[wordIndex]
  241. if wordStr.endswith('.'):
  242. if not wordStr.startswith('@'):
  243. wordStr=wordStr[:-1]
  244. if wordStr.startswith('.'):
  245. wordStr=wordStr[1:]
  246. newWords.append(wordStr)
  247. words=newWords
  248. replaceMentions={}
  249. replaceHashTags={}
  250. replaceEmoji={}
  251. emojiDict={}
  252. originalDomain=domain
  253. if ':' in domain:
  254. domain=domain.split(':')[0]
  255. followingFilename=baseDir+'/accounts/'+nickname+'@'+domain+'/following.txt'
  256. # read the following list so that we can detect just @nick
  257. # in addition to @nick@domain
  258. following=None
  259. if os.path.isfile(followingFilename):
  260. with open(followingFilename, "r") as f:
  261. following = f.readlines()
  262. # extract mentions and tags from words
  263. for wordStr in words:
  264. if addMention(wordStr,httpPrefix,following, \
  265. replaceMentions,recipients,hashtags):
  266. continue
  267. if addHashTags(wordStr,httpPrefix,originalDomain, \
  268. replaceHashTags,hashtags):
  269. continue
  270. if len(wordStr)>2 and wordStr.startswith(':') and \
  271. wordStr.endswith(':') and not emojiDict:
  272. print('Loading emoji lookup')
  273. # emoji.json is generated so that it can be customized and the changes
  274. # will be retained even if default_emoji.json is subsequently updated
  275. if not os.path.isfile(baseDir+'/emoji/emoji.json'):
  276. copyfile(baseDir+'/emoji/default_emoji.json', \
  277. baseDir+'/emoji/emoji.json')
  278. with open(baseDir+'/emoji/emoji.json', 'r') as fp:
  279. emojiDict=commentjson.load(fp)
  280. addEmoji(baseDir,wordStr,httpPrefix,originalDomain, \
  281. replaceEmoji,hashtags,emojiDict)
  282. # replace words with their html versions
  283. for wordStr,replaceStr in replaceMentions.items():
  284. content=content.replace(wordStr,replaceStr)
  285. for wordStr,replaceStr in replaceHashTags.items():
  286. content=content.replace(wordStr,replaceStr)
  287. for wordStr,replaceStr in replaceEmoji.items():
  288. content=content.replace(wordStr,replaceStr)
  289. content=addWebLinks(content)
  290. content=content.replace(' --linebreak-- ','</p><p>')
  291. return '<p>'+content+'</p>'
  292. def getMentionsFromHtml(htmlText: str, \
  293. matchStr="<span class=\"h-card\"><a href=\"") -> []:
  294. """Extracts mentioned actors from the given html content string
  295. """
  296. mentions=[]
  297. if matchStr not in htmlText:
  298. return mentions
  299. mentionsList=htmlText.split(matchStr)
  300. for mentionStr in mentionsList:
  301. if '"' not in mentionStr:
  302. continue
  303. actorStr=mentionStr.split('"')[0]
  304. if actorStr.startswith('http') or \
  305. actorStr.startswith('dat:'):
  306. mentions.append(actorStr)
  307. return mentions