content.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. __filename__ = "content.py"
  2. __author__ = "Bob Mottram"
  3. __license__ = "AGPL3+"
  4. __version__ = "0.0.1"
  5. __maintainer__ = "Bob Mottram"
  6. __email__ = "bob@freedombone.net"
  7. __status__ = "Production"
  8. import os
  9. import commentjson
  10. from shutil import copyfile
  11. def validHashTag(hashtag: str) -> bool:
  12. """Returns true if the give hashtag contains valid characters
  13. """
  14. validChars = set('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
  15. if set(hashtag).issubset(validChars):
  16. return True
  17. return False
  18. def addHashTags(wordStr: str,httpPrefix: str,domain: str,replaceHashTags: {},postHashtags: {}) -> bool:
  19. """Detects hashtags and adds them to the replacements dict
  20. Also updates the hashtags list to be added to the post
  21. """
  22. if not wordStr.startswith('#'):
  23. return False
  24. if len(wordStr)<2:
  25. return False
  26. if replaceHashTags.get(wordStr):
  27. return True
  28. hashtag=wordStr[1:]
  29. if not validHashTag(hashtag):
  30. return False
  31. hashtagUrl=httpPrefix+"://"+domain+"/tags/"+hashtag
  32. postHashtags[hashtag]= {
  33. 'href': hashtagUrl,
  34. 'name': '#'+hashtag,
  35. 'type': 'Hashtag'
  36. }
  37. replaceHashTags[wordStr]= \
  38. "<a href=\""+hashtagUrl+"\" class=\"mention hashtag\" rel=\"tag\">#<span>"+hashtag+"</span></a>"
  39. return True
  40. def loadEmojiDict(emojiDataFilename: str,emojiDict: {}) -> None:
  41. """Creates an emoji dictionary based on emoji/emoji-data.txt
  42. """
  43. if not os.path.isfile(emojiDataFilename):
  44. return
  45. with open (emojiDataFilename, "r") as fileHandler:
  46. for line in fileHandler:
  47. if len(line)<5:
  48. continue
  49. if line.startswith('#'):
  50. continue
  51. if '; Emoji' not in line:
  52. continue
  53. if ')' not in line:
  54. continue
  55. emojiUnicode=line.split(' ')[0]
  56. if len(emojiUnicode)<4:
  57. continue
  58. if '..' in emojiUnicode:
  59. emojiUnicode=emojiUnicode.split('..')[0]
  60. emojiName=line.split(')',1)[1].strip().replace('\n','').replace(' ','').replace('-','')
  61. if '..' in emojiName:
  62. emojiName=emojiName.split('..')[0]
  63. emojiDict[emojiName.lower()]=emojiUnicode
  64. def addEmoji(baseDir: str,wordStr: str,httpPrefix: str,domain: str,replaceEmoji: {},postTags: {},emojiDict: {}) -> bool:
  65. """Detects Emoji and adds them to the replacements dict
  66. Also updates the tags list to be added to the post
  67. """
  68. if not wordStr.startswith(':'):
  69. return False
  70. if not wordStr.endswith(':'):
  71. return False
  72. if len(wordStr)<3:
  73. return False
  74. if replaceEmoji.get(wordStr):
  75. return True
  76. emoji=wordStr[1:]
  77. emoji=emoji[:-1]
  78. if not validHashTag(emoji):
  79. return False
  80. if not emojiDict.get(emoji):
  81. return False
  82. emojiFilename=baseDir+'/emoji/'+emojiDict[emoji]+'.png'
  83. if not os.path.isfile(emojiFilename):
  84. return False
  85. emojiUrl=httpPrefix+"://"+domain+"/emoji/"+emojiDict[emoji]+'.png'
  86. postTags[emoji]= {
  87. 'href': emojiUrl,
  88. 'name': ':'+emoji+':',
  89. 'type': 'Emoji'
  90. }
  91. replaceEmoji[wordStr]= \
  92. "<img src=\""+emojiUrl+"\" alt=\""+emoji+"\" align=\"middle\" class=\"emoji\"/>"
  93. return True
  94. def addMention(wordStr: str,httpPrefix: str,following: str,replaceMentions: {},recipients: []) -> bool:
  95. """Detects mentions and adds them to the replacements dict and recipients list
  96. """
  97. if not wordStr.startswith('@'):
  98. return False
  99. if len(wordStr)<2:
  100. return False
  101. possibleHandle=wordStr[1:]
  102. if '@' not in possibleHandle:
  103. # fall back to a best effort match against the following list
  104. # if no domain was specified. eg. @nick
  105. possibleNickname=possibleHandle
  106. for follow in following:
  107. if follow.startswith(possibleNickname+'@'):
  108. replaceDomain=follow.replace('\n','').split('@')[1]
  109. recipientActor=httpPrefix+"://"+replaceDomain+"/users/"+possibleNickname
  110. if recipientActor not in recipients:
  111. recipients.append(recipientActor)
  112. replaceMentions[wordStr]="<span class=\"h-card\"><a href=\""+httpPrefix+"://"+replaceDomain+"/@"+possibleNickname+"\" class=\"u-url mention\">@<span>"+possibleNickname+"</span></a></span>"
  113. replaceFound=True
  114. return True
  115. return False
  116. possibleNickname=possibleHandle.split('@')[0]
  117. possibleDomain=possibleHandle.split('@')[1]
  118. for follow in following:
  119. if follow.replace('\n','')==possibleHandle:
  120. recipientActor=httpPrefix+"://"+possibleDomain+"/users/"+possibleNickname
  121. if recipientActor not in recipients:
  122. recipients.append(recipientActor)
  123. replaceMentions[wordStr]="<span class=\"h-card\"><a href=\""+httpPrefix+"://"+possibleDomain+"/@"+possibleNickname+"\" class=\"u-url mention\">@<span>"+possibleNickname+"</span></a></span>"
  124. return True
  125. return False
  126. def addHtmlTags(baseDir: str,httpPrefix: str, \
  127. nickname: str,domain: str,content: str, \
  128. recipients: [],hashtags: {}) -> str:
  129. """ Replaces plaintext mentions such as @nick@domain into html
  130. by matching against known following accounts
  131. """
  132. if content.startswith('<p>'):
  133. return content
  134. wordsOnly=content.replace(',',' ').replace(';',' ').replace('.',' ')
  135. words=wordsOnly.split(' ')
  136. replaceMentions={}
  137. replaceHashTags={}
  138. replaceEmoji={}
  139. emojiDict={}
  140. originalDomain=domain
  141. if ':' in domain:
  142. domain=domain.split(':')[0]
  143. followingFilename=baseDir+'/accounts/'+nickname+'@'+domain+'/following.txt'
  144. # read the following list so that we can detect just @nick
  145. # in addition to @nick@domain
  146. following=None
  147. if os.path.isfile(followingFilename):
  148. with open(followingFilename, "r") as f:
  149. following = f.readlines()
  150. # extract mentions and tags from words
  151. for wordStr in words:
  152. if following:
  153. if addMention(wordStr,httpPrefix,following,replaceMentions,recipients):
  154. continue
  155. if addHashTags(wordStr,httpPrefix,originalDomain,replaceHashTags,hashtags):
  156. continue
  157. if len(wordStr)>2 and wordStr.startswith(':') and wordStr.endswith(':') and not emojiDict:
  158. print('Loading emoji lookup')
  159. if not os.path.isfile(baseDir+'/emoji/emoji.json'):
  160. copyfile(baseDir+'/emoji/default_emoji.json',baseDir+'/emoji/emoji.json')
  161. with open(baseDir+'/emoji/emoji.json', 'r') as fp:
  162. emojiDict=commentjson.load(fp)
  163. addEmoji(baseDir,wordStr,httpPrefix,originalDomain,replaceEmoji,hashtags,emojiDict)
  164. # replace words with their html versions
  165. for wordStr,replaceStr in replaceMentions.items():
  166. content=content.replace(wordStr,replaceStr)
  167. for wordStr,replaceStr in replaceHashTags.items():
  168. content=content.replace(wordStr,replaceStr)
  169. for wordStr,replaceStr in replaceEmoji.items():
  170. content=content.replace(wordStr,replaceStr)
  171. content=content.replace('\n','</p><p>')
  172. return '<p>'+content+'</p>'
  173. def getMentionsFromHtml(htmlText: str,matchStr="<span class=\"h-card\"><a href=\"") -> []:
  174. """Extracts mentioned actors from the given html content string
  175. """
  176. mentions=[]
  177. if matchStr not in htmlText:
  178. return mentions
  179. mentionsList=htmlText.split(matchStr)
  180. for mentionStr in mentionsList:
  181. if '"' not in mentionStr:
  182. continue
  183. actorStr=mentionStr.split('"')[0]
  184. if actorStr.startswith('http') or \
  185. actorStr.startswith('dat:'):
  186. mentions.append(actorStr)
  187. return mentions