content.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. __filename__ = "content.py"
  2. __author__ = "Bob Mottram"
  3. __license__ = "AGPL3+"
  4. __version__ = "1.0.0"
  5. __maintainer__ = "Bob Mottram"
  6. __email__ = "bob@freedombone.net"
  7. __status__ = "Production"
  8. import os
  9. import commentjson
  10. from shutil import copyfile
  11. def addMusicTag(content: str,tag: str) -> str:
  12. """If a music link is found then ensure that the post is tagged appropriately
  13. """
  14. if '#' not in tag:
  15. tag='#'+tag
  16. if tag in content:
  17. return content
  18. musicSites=['soundcloud.com','bandcamp.com']
  19. musicSiteFound=False
  20. for site in musicSites:
  21. if site+'/' in content:
  22. musicSiteFound=True
  23. break
  24. if not musicSiteFound:
  25. return content
  26. return content+' '+tag+' '
  27. def addWebLinks(content: str) -> str:
  28. """Adds markup for web links
  29. """
  30. if not ('https://' in content or 'http://' in content):
  31. return content
  32. words=content.replace('\n',' --linebreak--').split(' ')
  33. replaceDict={}
  34. for w in words:
  35. if w.startswith('https://') or w.startswith('http://'):
  36. if w.endswith('.') or w.endswith(';'):
  37. w=w[:-1]
  38. markup='<a href="'+w+'" rel="nofollow noopener" target="_blank">'
  39. if w.startswith('https://'):
  40. markup+='<span class="invisible">https://</span>'
  41. elif w.startswith('http://'):
  42. markup+='<span class="invisible">http://</span>'
  43. markup+='<span class="ellipsis">'+w.replace('https://','').replace('http://','')+'</span></a>'
  44. replaceDict[w]=markup
  45. for url,markup in replaceDict.items():
  46. content=content.replace(url,markup)
  47. content=content.replace(' --linebreak--','<br>')
  48. return content
  49. def validHashTag(hashtag: str) -> bool:
  50. """Returns true if the give hashtag contains valid characters
  51. """
  52. validChars = set('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
  53. if set(hashtag).issubset(validChars):
  54. return True
  55. return False
  56. def addHashTags(wordStr: str,httpPrefix: str,domain: str,replaceHashTags: {},postHashtags: {}) -> bool:
  57. """Detects hashtags and adds them to the replacements dict
  58. Also updates the hashtags list to be added to the post
  59. """
  60. if not wordStr.startswith('#'):
  61. return False
  62. if len(wordStr)<2:
  63. return False
  64. if replaceHashTags.get(wordStr):
  65. return True
  66. hashtag=wordStr[1:]
  67. if not validHashTag(hashtag):
  68. return False
  69. hashtagUrl=httpPrefix+"://"+domain+"/tags/"+hashtag
  70. postHashtags[hashtag]= {
  71. 'href': hashtagUrl,
  72. 'name': '#'+hashtag,
  73. 'type': 'Hashtag'
  74. }
  75. replaceHashTags[wordStr]= \
  76. "<a href=\""+hashtagUrl+"\" class=\"mention hashtag\" rel=\"tag\">#<span>"+hashtag+"</span></a>"
  77. return True
  78. def loadEmojiDict(emojiDataFilename: str,emojiDict: {}) -> None:
  79. """Creates an emoji dictionary based on emoji/emoji-data.txt
  80. """
  81. if not os.path.isfile(emojiDataFilename):
  82. return
  83. with open (emojiDataFilename, "r") as fileHandler:
  84. for line in fileHandler:
  85. if len(line)<5:
  86. continue
  87. if line.startswith('#'):
  88. continue
  89. if '; Emoji' not in line:
  90. continue
  91. if ')' not in line:
  92. continue
  93. emojiUnicode=line.split(' ')[0]
  94. if len(emojiUnicode)<4:
  95. continue
  96. if '..' in emojiUnicode:
  97. emojiUnicode=emojiUnicode.split('..')[0]
  98. emojiName=line.split(')',1)[1].strip().replace('\n','').replace(' ','').replace('-','')
  99. if '..' in emojiName:
  100. emojiName=emojiName.split('..')[0]
  101. emojiDict[emojiName.lower()]=emojiUnicode
  102. def addEmoji(baseDir: str,wordStr: str,httpPrefix: str,domain: str,replaceEmoji: {},postTags: {},emojiDict: {}) -> bool:
  103. """Detects Emoji and adds them to the replacements dict
  104. Also updates the tags list to be added to the post
  105. """
  106. if not wordStr.startswith(':'):
  107. return False
  108. if not wordStr.endswith(':'):
  109. return False
  110. if len(wordStr)<3:
  111. return False
  112. if replaceEmoji.get(wordStr):
  113. return True
  114. emoji=wordStr[1:]
  115. emoji=emoji[:-1]
  116. if not validHashTag(emoji):
  117. return False
  118. if not emojiDict.get(emoji):
  119. return False
  120. emojiFilename=baseDir+'/emoji/'+emojiDict[emoji]+'.png'
  121. if not os.path.isfile(emojiFilename):
  122. return False
  123. emojiUrl=httpPrefix+"://"+domain+"/emoji/"+emojiDict[emoji]+'.png'
  124. postTags[emoji]= {
  125. 'icon': {
  126. 'mediaType': 'image/png',
  127. 'type': 'Image',
  128. 'url': emojiUrl
  129. },
  130. 'name': ':'+emoji+':',
  131. 'type': 'Emoji'
  132. }
  133. return True
  134. def addMention(wordStr: str,httpPrefix: str,following: str,replaceMentions: {},recipients: [],tags: {}) -> bool:
  135. """Detects mentions and adds them to the replacements dict and recipients list
  136. """
  137. if not wordStr.startswith('@'):
  138. return False
  139. if len(wordStr)<2:
  140. return False
  141. possibleHandle=wordStr[1:]
  142. # @nick
  143. if following and '@' not in possibleHandle:
  144. # fall back to a best effort match against the following list
  145. # if no domain was specified. eg. @nick
  146. possibleNickname=possibleHandle
  147. for follow in following:
  148. if follow.startswith(possibleNickname+'@'):
  149. replaceDomain=follow.replace('\n','').split('@')[1]
  150. recipientActor=httpPrefix+"://"+replaceDomain+"/users/"+possibleNickname
  151. if recipientActor not in recipients:
  152. recipients.append(recipientActor)
  153. tags[wordStr]={
  154. 'href': recipientActor,
  155. 'name': wordStr,
  156. 'type': 'Mention'
  157. }
  158. replaceMentions[wordStr]="<span class=\"h-card\"><a href=\""+httpPrefix+"://"+replaceDomain+"/@"+possibleNickname+"\" class=\"u-url mention\">@<span>"+possibleNickname+"</span></a></span>"
  159. return True
  160. return False
  161. possibleNickname=possibleHandle.split('@')[0]
  162. possibleDomain=possibleHandle.split('@')[1].strip('\n')
  163. if following:
  164. for follow in following:
  165. if follow.replace('\n','')!=possibleHandle:
  166. continue
  167. recipientActor=httpPrefix+"://"+possibleDomain+"/users/"+possibleNickname
  168. if recipientActor not in recipients:
  169. recipients.append(recipientActor)
  170. tags[wordStr]={
  171. 'href': recipientActor,
  172. 'name': wordStr,
  173. 'type': 'Mention'
  174. }
  175. replaceMentions[wordStr]="<span class=\"h-card\"><a href=\""+httpPrefix+"://"+possibleDomain+"/@"+possibleNickname+"\" class=\"u-url mention\">@<span>"+possibleNickname+"</span></a></span>"
  176. return True
  177. # @nick@domain
  178. if '@' in possibleHandle:
  179. if not (possibleDomain=='localhost' or '.' in possibleDomain):
  180. return False
  181. recipientActor=httpPrefix+"://"+possibleDomain+"/users/"+possibleNickname
  182. if recipientActor not in recipients:
  183. recipients.append(recipientActor)
  184. tags[wordStr]={
  185. 'href': recipientActor,
  186. 'name': wordStr,
  187. 'type': 'Mention'
  188. }
  189. replaceMentions[wordStr]="<span class=\"h-card\"><a href=\""+httpPrefix+"://"+possibleDomain+"/@"+possibleNickname+"\" class=\"u-url mention\">@<span>"+possibleNickname+"</span></a></span>"
  190. return True
  191. return False
  192. def addHtmlTags(baseDir: str,httpPrefix: str, \
  193. nickname: str,domain: str,content: str, \
  194. recipients: [],hashtags: {}) -> str:
  195. """ Replaces plaintext mentions such as @nick@domain into html
  196. by matching against known following accounts
  197. """
  198. if content.startswith('<p>'):
  199. return content
  200. content=content.replace('\n',' --linebreak-- ')
  201. content=addMusicTag(content,'nowplaying')
  202. words=content.replace(',',' ').replace(';',' ').split(' ')
  203. # remove . for words which are not mentions
  204. wordCtr=0
  205. newWords=[]
  206. for wordIndex in range(0,len(words)):
  207. wordStr=words[wordIndex]
  208. if wordStr.endswith('.'):
  209. if not wordStr.startswith('@'):
  210. wordStr=wordStr[:-1]
  211. if wordStr.startswith('.'):
  212. wordStr=wordStr[1:]
  213. newWords.append(wordStr)
  214. words=newWords
  215. replaceMentions={}
  216. replaceHashTags={}
  217. replaceEmoji={}
  218. emojiDict={}
  219. originalDomain=domain
  220. if ':' in domain:
  221. domain=domain.split(':')[0]
  222. followingFilename=baseDir+'/accounts/'+nickname+'@'+domain+'/following.txt'
  223. # read the following list so that we can detect just @nick
  224. # in addition to @nick@domain
  225. following=None
  226. if os.path.isfile(followingFilename):
  227. with open(followingFilename, "r") as f:
  228. following = f.readlines()
  229. # extract mentions and tags from words
  230. for wordStr in words:
  231. if addMention(wordStr,httpPrefix,following,replaceMentions,recipients,hashtags):
  232. continue
  233. if addHashTags(wordStr,httpPrefix,originalDomain,replaceHashTags,hashtags):
  234. continue
  235. if len(wordStr)>2 and wordStr.startswith(':') and wordStr.endswith(':') and not emojiDict:
  236. print('Loading emoji lookup')
  237. if not os.path.isfile(baseDir+'/emoji/emoji.json'):
  238. copyfile(baseDir+'/emoji/default_emoji.json',baseDir+'/emoji/emoji.json')
  239. with open(baseDir+'/emoji/emoji.json', 'r') as fp:
  240. emojiDict=commentjson.load(fp)
  241. addEmoji(baseDir,wordStr,httpPrefix,originalDomain,replaceEmoji,hashtags,emojiDict)
  242. # replace words with their html versions
  243. for wordStr,replaceStr in replaceMentions.items():
  244. content=content.replace(wordStr,replaceStr)
  245. for wordStr,replaceStr in replaceHashTags.items():
  246. content=content.replace(wordStr,replaceStr)
  247. for wordStr,replaceStr in replaceEmoji.items():
  248. content=content.replace(wordStr,replaceStr)
  249. content=addWebLinks(content)
  250. content=content.replace(' --linebreak-- ','</p><p>')
  251. return '<p>'+content+'</p>'
  252. def getMentionsFromHtml(htmlText: str,matchStr="<span class=\"h-card\"><a href=\"") -> []:
  253. """Extracts mentioned actors from the given html content string
  254. """
  255. mentions=[]
  256. if matchStr not in htmlText:
  257. return mentions
  258. mentionsList=htmlText.split(matchStr)
  259. for mentionStr in mentionsList:
  260. if '"' not in mentionStr:
  261. continue
  262. actorStr=mentionStr.split('"')[0]
  263. if actorStr.startswith('http') or \
  264. actorStr.startswith('dat:'):
  265. mentions.append(actorStr)
  266. return mentions