123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509 |
- __filename__ = "content.py"
- __author__ = "Bob Mottram"
- __license__ = "AGPL3+"
- __version__ = "1.0.0"
- __maintainer__ = "Bob Mottram"
- __email__ = "bob@freedombone.net"
- __status__ = "Production"
- import os
- import time
- import email.parser
- from shutil import copyfile
- from utils import loadJson
- def replaceEmojiFromTags(content: str,tag: [],messageType: str) -> str:
- """Uses the tags to replace :emoji: with html image markup
- """
- for tagItem in tag:
- if not tagItem.get('type'):
- continue
- if tagItem['type']!='Emoji':
- continue
- if not tagItem.get('name'):
- continue
- if not tagItem.get('icon'):
- continue
- if not tagItem['icon'].get('url'):
- continue
- if tagItem['name'] not in content:
- continue
- htmlClass='emoji'
- if messageType=='post header':
- htmlClass='emojiheader'
- if messageType=='profile':
- htmlClass='emojiprofile'
- emojiHtml="<img src=\""+tagItem['icon']['url']+"\" alt=\""+tagItem['name'].replace(':','')+"\" align=\"middle\" class=\""+htmlClass+"\"/>"
- content=content.replace(tagItem['name'],emojiHtml)
- return content
- def addMusicTag(content: str,tag: str) -> str:
- """If a music link is found then ensure that the post is tagged appropriately
- """
- if '#' not in tag:
- tag='#'+tag
- if tag in content:
- return content
- musicSites=['soundcloud.com','bandcamp.com']
- musicSiteFound=False
- for site in musicSites:
- if site+'/' in content:
- musicSiteFound=True
- break
- if not musicSiteFound:
- return content
- return ':music: '+content+' '+tag+' '
- def addWebLinks(content: str) -> str:
- """Adds markup for web links
- """
- if not ('https://' in content or 'http://' in content):
- return content
- maxLinkLength=40
- words=content.replace('\n',' --linebreak-- ').split(' ')
- replaceDict={}
- for w in words:
- if w.startswith('https://') or \
- w.startswith('http://') or \
- w.startswith('dat://'):
- if w.endswith('.') or w.endswith(';'):
- w=w[:-1]
- markup='<a href="'+w+'" rel="nofollow noopener" target="_blank">'
- if w.startswith('https://'):
- markup+='<span class="invisible">https://</span>'
- elif w.startswith('http://'):
- markup+='<span class="invisible">http://</span>'
- elif w.startswith('dat://'):
- markup+='<span class="invisible">dat://</span>'
- linkText=w.replace('https://','').replace('http://','').replace('dat://','')
- # prevent links from becoming too long
- if len(linkText)>maxLinkLength:
- markup+='<span class="ellipsis">'+linkText[:maxLinkLength]+'</span>'
- markup+='<span class="invisible">'+linkText[maxLinkLength:]+'</span></a>'
- else:
- markup+='<span class="ellipsis">'+linkText+'</span></a>'
- replaceDict[w]=markup
- for url,markup in replaceDict.items():
- content=content.replace(url,markup)
- content=content.replace(' --linebreak-- ','<br>')
- return content
- def validHashTag(hashtag: str) -> bool:
- """Returns true if the give hashtag contains valid characters
- """
- validChars = set('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
- if set(hashtag).issubset(validChars):
- return True
- return False
- def addHashTags(wordStr: str,httpPrefix: str,domain: str,replaceHashTags: {},postHashtags: {}) -> bool:
- """Detects hashtags and adds them to the replacements dict
- Also updates the hashtags list to be added to the post
- """
- if replaceHashTags.get(wordStr):
- return True
- hashtag=wordStr[1:]
- if not validHashTag(hashtag):
- return False
- hashtagUrl=httpPrefix+"://"+domain+"/tags/"+hashtag
- postHashtags[hashtag]= {
- 'href': hashtagUrl,
- 'name': '#'+hashtag,
- 'type': 'Hashtag'
- }
- replaceHashTags[wordStr]= \
- "<a href=\""+hashtagUrl+"\" class=\"mention hashtag\" rel=\"tag\">#<span>"+hashtag+"</span></a>"
- return True
- def loadEmojiDict(emojiDataFilename: str,emojiDict: {}) -> None:
- """Creates an emoji dictionary based on emoji/emoji-data.txt
- """
- if not os.path.isfile(emojiDataFilename):
- return
- with open (emojiDataFilename, "r") as fileHandler:
- for line in fileHandler:
- if len(line)<5:
- continue
- if line.startswith('#'):
- continue
- if '; Emoji' not in line:
- continue
- if ')' not in line:
- continue
- emojiUnicode=line.split(' ')[0]
- if len(emojiUnicode)<4:
- continue
- if '..' in emojiUnicode:
- emojiUnicode=emojiUnicode.split('..')[0]
- emojiName=line.split(')',1)[1].strip().replace('\n','').replace(' ','').replace('-','')
- if '..' in emojiName:
- emojiName=emojiName.split('..')[0]
- emojiDict[emojiName.lower()]=emojiUnicode
- def addEmoji(baseDir: str,wordStr: str,httpPrefix: str,domain: str,replaceEmoji: {},postTags: {},emojiDict: {}) -> bool:
- """Detects Emoji and adds them to the replacements dict
- Also updates the tags list to be added to the post
- """
- if not wordStr.startswith(':'):
- return False
- if not wordStr.endswith(':'):
- return False
- if len(wordStr)<3:
- return False
- if replaceEmoji.get(wordStr):
- return True
- # remove leading and trailing : characters
- emoji=wordStr[1:]
- emoji=emoji[:-1]
- # is the text of the emoji valid?
- if not validHashTag(emoji):
- return False
- if not emojiDict.get(emoji):
- return False
- emojiFilename=baseDir+'/emoji/'+emojiDict[emoji]+'.png'
- if not os.path.isfile(emojiFilename):
- return False
- emojiUrl=httpPrefix+"://"+domain+"/emoji/"+emojiDict[emoji]+'.png'
- postTags[emoji]= {
- 'icon': {
- 'mediaType': 'image/png',
- 'type': 'Image',
- 'url': emojiUrl
- },
- 'name': ':'+emoji+':',
- 'type': 'Emoji'
- }
- return True
- def addMention(wordStr: str,httpPrefix: str,following: str,replaceMentions: {},recipients: [],tags: {}) -> bool:
- """Detects mentions and adds them to the replacements dict and recipients list
- """
- possibleHandle=wordStr[1:]
- # @nick
- if following and '@' not in possibleHandle:
- # fall back to a best effort match against the following list
- # if no domain was specified. eg. @nick
- possibleNickname=possibleHandle
- for follow in following:
- if follow.startswith(possibleNickname+'@'):
- replaceDomain=follow.replace('\n','').split('@')[1]
- recipientActor=httpPrefix+"://"+replaceDomain+"/users/"+possibleNickname
- if recipientActor not in recipients:
- recipients.append(recipientActor)
- tags[wordStr]={
- 'href': recipientActor,
- 'name': wordStr,
- 'type': 'Mention'
- }
- replaceMentions[wordStr]="<span class=\"h-card\"><a href=\""+httpPrefix+"://"+replaceDomain+"/@"+possibleNickname+"\" class=\"u-url mention\">@<span>"+possibleNickname+"</span></a></span>"
- return True
- return False
- possibleNickname=None
- possibleDomain=None
- if '@' not in possibleHandle:
- return False
- possibleNickname=possibleHandle.split('@')[0]
- if not possibleNickname:
- return False
- possibleDomain=possibleHandle.split('@')[1].strip('\n')
- if not possibleDomain:
- return False
- if following:
- for follow in following:
- if follow.replace('\n','')!=possibleHandle:
- continue
- recipientActor=httpPrefix+"://"+possibleDomain+"/users/"+possibleNickname
- if recipientActor not in recipients:
- recipients.append(recipientActor)
- tags[wordStr]={
- 'href': recipientActor,
- 'name': wordStr,
- 'type': 'Mention'
- }
- replaceMentions[wordStr]="<span class=\"h-card\"><a href=\""+httpPrefix+"://"+possibleDomain+"/@"+possibleNickname+"\" class=\"u-url mention\">@<span>"+possibleNickname+"</span></a></span>"
- return True
- # @nick@domain
- if not (possibleDomain=='localhost' or '.' in possibleDomain):
- return False
- recipientActor=httpPrefix+"://"+possibleDomain+"/users/"+possibleNickname
- if recipientActor not in recipients:
- recipients.append(recipientActor)
- tags[wordStr]={
- 'href': recipientActor,
- 'name': wordStr,
- 'type': 'Mention'
- }
- replaceMentions[wordStr]="<span class=\"h-card\"><a href=\""+httpPrefix+"://"+possibleDomain+"/@"+possibleNickname+"\" class=\"u-url mention\">@<span>"+possibleNickname+"</span></a></span>"
- return True
- def removeLongWords(content: str,maxWordLength: int,longWordsList: []) -> str:
- """Breaks up long words so that on mobile screens this doesn't disrupt the layout
- """
- words=content.split(' ')
- if not longWordsList:
- longWordsList=[]
- for wordStr in words:
- if len(wordStr)>maxWordLength:
- if wordStr not in longWordsList:
- longWordsList.append(wordStr)
- for wordStr in longWordsList:
- if wordStr.startswith('<'):
- continue
- if '=\"' in wordStr:
- continue
- if '@' in wordStr:
- if '@@' not in wordStr:
- continue
- if 'https:' in wordStr:
- continue
- elif 'http:' in wordStr:
- continue
- elif 'dat:' in wordStr:
- continue
- if '<' in wordStr:
- wordStr=wordStr.split('<',1)[0]
- if '/' in wordStr:
- continue
- if len(wordStr[maxWordLength:])<maxWordLength:
- content= \
- content.replace(wordStr, \
- wordStr[:maxWordLength]+'\n'+ \
- wordStr[maxWordLength:])
- else:
- content= \
- content.replace(wordStr, \
- wordStr[:maxWordLength])
- return content
- def addHtmlTags(baseDir: str,httpPrefix: str, \
- nickname: str,domain: str,content: str, \
- recipients: [],hashtags: {},isJsonContent=False) -> str:
- """ Replaces plaintext mentions such as @nick@domain into html
- by matching against known following accounts
- """
- if content.startswith('<p>'):
- return content
- maxWordLength=40
- content=content.replace('\n',' --linebreak-- ')
- content=addMusicTag(content,'nowplaying')
- words=content.replace(',',' ').replace(';',' ').split(' ')
-
- # remove . for words which are not mentions
- wordCtr=0
- newWords=[]
- for wordIndex in range(0,len(words)):
- wordStr=words[wordIndex]
- if wordStr.endswith('.'):
- if not wordStr.startswith('@'):
- wordStr=wordStr[:-1]
- if wordStr.startswith('.'):
- wordStr=wordStr[1:]
- newWords.append(wordStr)
- words=newWords
- replaceMentions={}
- replaceHashTags={}
- replaceEmoji={}
- emojiDict={}
- originalDomain=domain
- if ':' in domain:
- domain=domain.split(':')[0]
- followingFilename=baseDir+'/accounts/'+nickname+'@'+domain+'/following.txt'
- # read the following list so that we can detect just @nick
- # in addition to @nick@domain
- following=None
- if '@' in words:
- if os.path.isfile(followingFilename):
- with open(followingFilename, "r") as f:
- following = f.readlines()
- # extract mentions and tags from words
- longWordsList=[]
- for wordStr in words:
- wordLen=len(wordStr)
- if wordLen>2:
- if wordLen>maxWordLength:
- longWordsList.append(wordStr)
- firstChar=wordStr[0]
- if firstChar=='@':
- if addMention(wordStr,httpPrefix,following,replaceMentions,recipients,hashtags):
- continue
- elif firstChar=='#':
- if addHashTags(wordStr,httpPrefix,originalDomain,replaceHashTags,hashtags):
- continue
- elif ':' in wordStr:
- #print('TAG: emoji located - '+wordStr)
- wordStr2=wordStr.split(':')[1]
- if not emojiDict:
- # emoji.json is generated so that it can be customized and the changes
- # will be retained even if default_emoji.json is subsequently updated
- if not os.path.isfile(baseDir+'/emoji/emoji.json'):
- copyfile(baseDir+'/emoji/default_emoji.json',baseDir+'/emoji/emoji.json')
- emojiDict=loadJson(baseDir+'/emoji/emoji.json')
- #print('TAG: looking up emoji for :'+wordStr2+':')
- addEmoji(baseDir,':'+wordStr2+':',httpPrefix,originalDomain,replaceEmoji,hashtags,emojiDict)
- # replace words with their html versions
- for wordStr,replaceStr in replaceMentions.items():
- content=content.replace(wordStr,replaceStr)
- for wordStr,replaceStr in replaceHashTags.items():
- content=content.replace(wordStr,replaceStr)
- if not isJsonContent:
- for wordStr,replaceStr in replaceEmoji.items():
- content=content.replace(wordStr,replaceStr)
- content=addWebLinks(content)
- if longWordsList:
- content=removeLongWords(content,maxWordLength,longWordsList)
- content=content.replace(' --linebreak-- ','</p><p>')
- return '<p>'+content+'</p>'
-
- def getMentionsFromHtml(htmlText: str,matchStr="<span class=\"h-card\"><a href=\"") -> []:
- """Extracts mentioned actors from the given html content string
- """
- mentions=[]
- if matchStr not in htmlText:
- return mentions
- mentionsList=htmlText.split(matchStr)
- for mentionStr in mentionsList:
- if '"' not in mentionStr:
- continue
- actorStr=mentionStr.split('"')[0]
- if actorStr.startswith('http') or \
- actorStr.startswith('dat:'):
- if actorStr not in mentions:
- mentions.append(actorStr)
- return mentions
- def extractMediaInFormPOST(postBytes,boundary,name: str):
- """Extracts the binary encoding for image/video/audio within a http form POST
- Returns the media bytes and the remaining bytes
- """
- imageStartBoundary=b'Content-Disposition: form-data; name="'+name.encode('utf8', 'ignore')+b'";'
- imageStartLocation=postBytes.find(imageStartBoundary)
- if imageStartLocation==-1:
- return None,postBytes
- # bytes after the start boundary appears
- mediaBytes=postBytes[imageStartLocation:]
- # look for the next boundary
- imageEndBoundary=boundary.encode('utf8', 'ignore')
- imageEndLocation=mediaBytes.find(imageEndBoundary)
- if imageEndLocation==-1:
- # no ending boundary
- return mediaBytes,postBytes[:imageStartLocation]
- # remaining bytes after the end of the image
- remainder=mediaBytes[imageEndLocation:]
- # remove bytes after the end boundary
- mediaBytes=mediaBytes[:imageEndLocation]
- # return the media and the before+after bytes
- return mediaBytes,postBytes[:imageStartLocation]+remainder
- def saveMediaInFormPOST(mediaBytes,debug: bool, \
- filenameBase=None) -> (str,str):
- """Saves the given media bytes extracted from http form POST
- Returns the filename and attachment type
- """
- if not mediaBytes:
- if debug:
- print('DEBUG: No media found within POST')
- return None,None
- mediaLocation=-1
- searchStr=''
- filename=None
-
- # directly search the binary array for the beginning
- # of an image
- extensionList= {
- 'png': 'image/png',
- 'jpeg': 'image/jpeg',
- 'gif': 'image/gif',
- 'webp': 'image/webp',
- 'mp4': 'video/mp4',
- 'ogv': 'video/ogv',
- 'mp3': 'audio/mpeg',
- 'ogg': 'audio/ogg'
- }
- detectedExtension=None
- for extension,contentType in extensionList.items():
- searchStr=b'Content-Type: '+contentType.encode('utf8', 'ignore')
- mediaLocation=mediaBytes.find(searchStr)
- if mediaLocation>-1:
- if extension=='jpeg':
- extension='jpg'
- elif extension=='mpeg':
- extension='mp3'
- filename=filenameBase+'.'+extension
- attachmentMediaType= \
- searchStr.decode().split('/')[0].replace('Content-Type: ','')
- detectedExtension=extension
- break
- if not filename:
- return None,None
- # locate the beginning of the image, after any
- # carriage returns
- startPos=mediaLocation+len(searchStr)
- for offset in range(1,8):
- if mediaBytes[startPos+offset]!=10:
- if mediaBytes[startPos+offset]!=13:
- startPos+=offset
- break
- # remove any existing image files with a different format
- extensionTypes=('png','jpg','jpeg','gif','webp')
- for ex in extensionTypes:
- if ex==detectedExtension:
- continue
- possibleOtherFormat=filename.replace('.temp','').replace('.'+detectedExtension,'.'+ex)
- if os.path.isfile(possibleOtherFormat):
- os.remove(possibleOtherFormat)
- fd = open(filename, 'wb')
- fd.write(mediaBytes[startPos:])
- fd.close()
- return filename,attachmentMediaType
- def extractTextFieldsInPOST(postBytes,boundary,debug: bool) -> {}:
- """Returns a dictionary containing the text fields of a http form POST
- The boundary argument comes from the http header
- """
- msg = email.parser.BytesParser().parsebytes(postBytes)
- if debug:
- print('DEBUG: POST arriving '+msg.get_payload(decode=True).decode('utf-8'))
- messageFields=msg.get_payload(decode=True).decode('utf-8').split(boundary)
- fields={}
- # examine each section of the POST, separated by the boundary
- for f in messageFields:
- if f=='--':
- continue
- if ' name="' not in f:
- continue
- postStr=f.split(' name="',1)[1]
- if '"' not in postStr:
- continue
- postKey=postStr.split('"',1)[0]
- postValueStr=postStr.split('"',1)[1]
- if ';' in postValueStr:
- continue
- if '\r\n' not in postValueStr:
- continue
- postLines=postValueStr.split('\r\n')
- postValue=''
- if len(postLines)>2:
- for line in range(2,len(postLines)-1):
- if line>2:
- postValue+='\n'
- postValue+=postLines[line]
- fields[postKey]=postValue
- return fields
|