utils.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. __filename__ = "utils.py"
  2. __author__ = "Bob Mottram"
  3. __license__ = "AGPL3+"
  4. __version__ = "1.0.0"
  5. __maintainer__ = "Bob Mottram"
  6. __email__ = "bob@freedombone.net"
  7. __status__ = "Production"
  8. import os
  9. import time
  10. import shutil
  11. import datetime
  12. import commentjson
  13. def saveJson(jsonObject: {},filename: str) -> bool:
  14. """Saves json to a file
  15. """
  16. tries=0
  17. while tries<5:
  18. try:
  19. with open(filename, 'w') as fp:
  20. commentjson.dump(jsonObject, fp, indent=2, sort_keys=False)
  21. return True
  22. except:
  23. print('WARN: saveJson '+str(tries))
  24. time.sleep(1)
  25. tries+=1
  26. return False
  27. def loadJson(filename: str) -> {}:
  28. """Makes a few attempts to load a json formatted file
  29. """
  30. jsonObject=None
  31. tries=0
  32. while tries<5:
  33. try:
  34. with open(filename, 'r') as fp:
  35. jsonObject=commentjson.load(fp)
  36. break
  37. except:
  38. print('WARN: loadJson exception')
  39. time.sleep(2)
  40. tries+=1
  41. return jsonObject
  42. def getStatusNumber() -> (str,str):
  43. """Returns the status number and published date
  44. """
  45. currTime=datetime.datetime.utcnow()
  46. daysSinceEpoch=(currTime - datetime.datetime(1970,1,1)).days
  47. # status is the number of seconds since epoch
  48. statusNumber=str(((daysSinceEpoch*24*60*60) + (currTime.hour*60*60) + (currTime.minute*60) + currTime.second)*1000 + int(currTime.microsecond/1000))
  49. # See https://github.com/tootsuite/mastodon/blob/995f8b389a66ab76ec92d9a240de376f1fc13a38/lib/mastodon/snowflake.rb
  50. # use the leftover microseconds as the sequence number
  51. sequenceId=currTime.microsecond % 1000
  52. # shift by 16bits "sequence data"
  53. statusNumber=str((int(statusNumber)<<16)+sequenceId)
  54. published=currTime.strftime("%Y-%m-%dT%H:%M:%SZ")
  55. return statusNumber,published
  56. def isEvil(domain: str) -> bool:
  57. # https://www.youtube.com/watch?v=5qw1hcevmdU
  58. evilDomains=('gab.com','gabfed.com','spinster.xyz','kiwifarms.cc','djitter.com')
  59. for concentratedEvil in evilDomains:
  60. if domain.endswith(concentratedEvil):
  61. return True
  62. return False
  63. def createPersonDir(nickname: str,domain: str,baseDir: str,dirname: str) -> str:
  64. """Create a directory for a person
  65. """
  66. handle=nickname+'@'+domain
  67. if not os.path.isdir(baseDir+'/accounts/'+handle):
  68. os.mkdir(baseDir+'/accounts/'+handle)
  69. boxDir=baseDir+'/accounts/'+handle+'/'+dirname
  70. if not os.path.isdir(boxDir):
  71. os.mkdir(boxDir)
  72. return boxDir
  73. def createOutboxDir(nickname: str,domain: str,baseDir: str) -> str:
  74. """Create an outbox for a person
  75. """
  76. return createPersonDir(nickname,domain,baseDir,'outbox')
  77. def createInboxQueueDir(nickname: str,domain: str,baseDir: str) -> str:
  78. """Create an inbox queue and returns the feed filename and directory
  79. """
  80. return createPersonDir(nickname,domain,baseDir,'queue')
  81. def domainPermitted(domain: str, federationList: []):
  82. if len(federationList)==0:
  83. return True
  84. if ':' in domain:
  85. domain=domain.split(':')[0]
  86. if domain in federationList:
  87. return True
  88. return False
  89. def urlPermitted(url: str,federationList: [],capability: str):
  90. if isEvil(url):
  91. return False
  92. if not federationList:
  93. return True
  94. for domain in federationList:
  95. if domain in url:
  96. return True
  97. return False
  98. def getDisplayName(baseDir: str,actor: str,personCache: {}) -> str:
  99. """Returns the display name for the given actor
  100. """
  101. if '/statuses/' in actor:
  102. actor=actor.split('/statuses/')[0]
  103. if not personCache.get(actor):
  104. return None
  105. if personCache[actor].get('actor'):
  106. if personCache[actor]['actor'].get('name'):
  107. return personCache[actor]['actor']['name']
  108. else:
  109. # Try to obtain from the cached actors
  110. cachedActorFilename=baseDir+'/cache/actors/'+actor.replace('/','#')+'.json'
  111. if os.path.isfile(cachedActorFilename):
  112. actorJson=None
  113. tries=0
  114. while tries<5:
  115. try:
  116. with open(cachedActorFilename, 'r') as fp:
  117. actorJson=commentjson.load(fp)
  118. break
  119. except:
  120. print('WARN: getDisplayName')
  121. time.sleep(1)
  122. tries+=1
  123. if actorJson:
  124. if actorJson.get('name'):
  125. return(actorJson['name'])
  126. return None
  127. def getNicknameFromActor(actor: str) -> str:
  128. """Returns the nickname from an actor url
  129. """
  130. if '/users/' not in actor:
  131. if '/profile/' in actor:
  132. nickStr=actor.split('/profile/')[1].replace('@','')
  133. if '/' not in nickStr:
  134. return nickStr
  135. else:
  136. return nickStr.split('/')[0]
  137. if '/channel/' in actor:
  138. nickStr=actor.split('/channel/')[1].replace('@','')
  139. if '/' not in nickStr:
  140. return nickStr
  141. else:
  142. return nickStr.split('/')[0]
  143. # https://domain/@nick
  144. if '/@' in actor:
  145. nickStr=actor.split('/@')[1]
  146. if '/' in nickStr:
  147. nickStr=nickStr.split('/')[0]
  148. return nickStr
  149. return None
  150. nickStr=actor.split('/users/')[1].replace('@','')
  151. if '/' not in nickStr:
  152. return nickStr
  153. else:
  154. return nickStr.split('/')[0]
  155. def getDomainFromActor(actor: str) -> (str,int):
  156. """Returns the domain name from an actor url
  157. """
  158. port=None
  159. if '/profile/' in actor:
  160. domain = actor.split('/profile/')[0].replace('https://','').replace('http://','').replace('dat://','')
  161. else:
  162. if '/channel/' in actor:
  163. domain = actor.split('/channel/')[0].replace('https://','').replace('http://','').replace('dat://','')
  164. else:
  165. if '/users/' not in actor:
  166. domain = actor.replace('https://','').replace('http://','').replace('dat://','')
  167. if '/' in actor:
  168. domain=domain.split('/')[0]
  169. else:
  170. domain = actor.split('/users/')[0].replace('https://','').replace('http://','').replace('dat://','')
  171. if ':' in domain:
  172. port=int(domain.split(':')[1])
  173. domain=domain.split(':')[0]
  174. return domain,port
  175. def followPerson(baseDir: str,nickname: str, domain: str, \
  176. followNickname: str, followDomain: str, \
  177. federationList: [],debug: bool, \
  178. followFile='following.txt') -> bool:
  179. """Adds a person to the follow list
  180. """
  181. if not domainPermitted(followDomain.lower().replace('\n',''), \
  182. federationList):
  183. if debug:
  184. print('DEBUG: follow of domain '+followDomain+' not permitted')
  185. return False
  186. if debug:
  187. print('DEBUG: follow of domain '+followDomain)
  188. if ':' in domain:
  189. handle=nickname+'@'+domain.split(':')[0].lower()
  190. else:
  191. handle=nickname+'@'+domain.lower()
  192. if ':' in followDomain:
  193. handleToFollow=followNickname+'@'+followDomain.split(':')[0].lower()
  194. else:
  195. handleToFollow=followNickname+'@'+followDomain.lower()
  196. if not os.path.isdir(baseDir+'/accounts'):
  197. os.mkdir(baseDir+'/accounts')
  198. if not os.path.isdir(baseDir+'/accounts/'+handle):
  199. os.mkdir(baseDir+'/accounts/'+handle)
  200. filename=baseDir+'/accounts/'+handle+'/'+followFile
  201. if os.path.isfile(filename):
  202. if handleToFollow in open(filename).read():
  203. if debug:
  204. print('DEBUG: follow already exists')
  205. return True
  206. # prepend to follow file
  207. try:
  208. with open(filename, 'r+') as followFile:
  209. content = followFile.read()
  210. followFile.seek(0, 0)
  211. followFile.write(followNickname+'@'+followDomain+'\n'+content)
  212. if debug:
  213. print('DEBUG: follow added')
  214. return True
  215. except Exception as e:
  216. print('WARN: Failed to write entry to follow file '+filename+' '+str(e))
  217. if debug:
  218. print('DEBUG: creating new following file')
  219. with open(filename, "w") as followfile:
  220. followfile.write(followNickname+'@'+followDomain+'\n')
  221. return True
  222. def locatePost(baseDir: str,nickname: str,domain: str,postUrl: str,replies=False) -> str:
  223. """Returns the filename for the given status post url
  224. """
  225. if not replies:
  226. extension='json'
  227. else:
  228. extension='replies'
  229. # if this post in the shared inbox?
  230. handle='inbox@'+domain
  231. postUrl=postUrl.replace('/','#').replace('/activity','').strip()
  232. boxName='inbox'
  233. postFilename=baseDir+'/accounts/'+nickname+'@'+domain+'/'+boxName+'/'+postUrl+'.'+extension
  234. if os.path.isfile(postFilename):
  235. return postFilename
  236. boxName='outbox'
  237. postFilename=baseDir+'/accounts/'+nickname+'@'+domain+'/'+boxName+'/'+postUrl+'.'+extension
  238. if os.path.isfile(postFilename):
  239. return postFilename
  240. # if this post in the inbox of the person?
  241. boxName='inbox'
  242. postFilename=baseDir+'/accounts/'+nickname+'@'+domain+'/'+boxName+'/'+postUrl+'.'+extension
  243. if os.path.isfile(postFilename):
  244. return postFilename
  245. postFilename=baseDir+'/cache/announce/'+nickname+'/'+postUrl+'.'+extension
  246. if os.path.isfile(postFilename):
  247. return postFilename
  248. print('WARN: unable to locate '+nickname+' '+postUrl+'.'+extension)
  249. return None
  250. def removeAttachment(baseDir: str,httpPrefix: str,domain: str,postJson: {}):
  251. if not postJson.get('attachment'):
  252. return
  253. if not postJson['attachment'][0].get('url'):
  254. return
  255. if port:
  256. if port!=80 and port!=443:
  257. if ':' not in domain:
  258. domain=domain+':'+str(port)
  259. attachmentUrl=postJson['attachment'][0]['url']
  260. if not attachmentUrl:
  261. return
  262. mediaFilename=baseDir+'/'+attachmentUrl.replace(httpPrefix+'://'+domain+'/','')
  263. if os.path.isfile(mediaFilename):
  264. os.remove(mediaFilename)
  265. postJson['attachment']=[]
  266. def removeModerationPostFromIndex(baseDir: str,postUrl: str,debug: bool) -> None:
  267. """Removes a url from the moderation index
  268. """
  269. moderationIndexFile=baseDir+'/accounts/moderation.txt'
  270. if not os.path.isfile(moderationIndexFile):
  271. return
  272. postId=postUrl.replace('/activity','')
  273. if postId in open(moderationIndexFile).read():
  274. with open(moderationIndexFile, "r") as f:
  275. lines = f.readlines()
  276. with open(moderationIndexFile, "w+") as f:
  277. for line in lines:
  278. if line.strip("\n") != postId:
  279. f.write(line)
  280. else:
  281. if debug:
  282. print('DEBUG: removed '+postId+' from moderation index')
  283. def deletePost(baseDir: str,httpPrefix: str,nickname: str,domain: str,postFilename: str,debug: bool) -> None:
  284. """Recursively deletes a post and its replies and attachments
  285. """
  286. postJsonObject=None
  287. tries=0
  288. while tries<5:
  289. try:
  290. with open(postFilename, 'r') as fp:
  291. postJsonObject=commentjson.load(fp)
  292. break
  293. except:
  294. print('WARN: deletePost')
  295. time.sleep(1)
  296. tries+=1
  297. if postJsonObject:
  298. # don't allow deletion of bookmarked posts
  299. bookmarksIndexFilename=baseDir+'/accounts/'+nickname+'@'+domain+'/bookmarks.index'
  300. if os.path.isfile(bookmarksIndexFilename):
  301. bookmarkIndex=postFilename.split('/')[-1]+'\n'
  302. if bookmarkIndex in open(bookmarksIndexFilename).read():
  303. return
  304. # remove any attachment
  305. removeAttachment(baseDir,httpPrefix,domain,postJsonObject)
  306. hasObject=False
  307. if postJsonObject.get('object'):
  308. hasObject=True
  309. # remove from moderation index file
  310. if hasObject:
  311. if postJsonObject['object'].get('moderationStatus'):
  312. if postJsonObject.get('id'):
  313. postId=postJsonObject['id'].replace('/activity','')
  314. removeModerationPostFromIndex(baseDir,postId,debug)
  315. # remove any hashtags index entries
  316. removeHashtagIndex=False
  317. if hasObject:
  318. if hasObject and isinstance(postJsonObject['object'], dict):
  319. if postJsonObject['object'].get('content'):
  320. if '#' in postJsonObject['object']['content']:
  321. removeHashtagIndex=True
  322. if removeHashtagIndex:
  323. if postJsonObject['object'].get('id') and postJsonObject['object'].get('tag'):
  324. # get the id of the post
  325. postId=postJsonObject['object']['id'].replace('/activity','')
  326. for tag in postJsonObject['object']['tag']:
  327. if tag['type']!='Hashtag':
  328. continue
  329. # find the index file for this tag
  330. tagIndexFilename=baseDir+'/tags/'+tag['name'][1:]+'.txt'
  331. if not os.path.isfile(tagIndexFilename):
  332. continue
  333. # remove postId from the tag index file
  334. with open(tagIndexFilename, "r") as f:
  335. lines = f.readlines()
  336. with open(tagIndexFilename, "w+") as f:
  337. for line in lines:
  338. if line.strip("\n") != postId:
  339. f.write(line)
  340. # remove any replies
  341. repliesFilename=postFilename.replace('.json','.replies')
  342. if os.path.isfile(repliesFilename):
  343. if debug:
  344. print('DEBUG: removing replies to '+postFilename)
  345. with open(repliesFilename,'r') as f:
  346. for replyId in f:
  347. replyFile=locatePost(baseDir,nickname,domain,replyId)
  348. if replyFile:
  349. if os.path.isfile(replyFile):
  350. deletePost(baseDir,nickname,domain,replyFile,debug)
  351. # remove the replies file
  352. os.remove(repliesFilename)
  353. # finally, remove the post itself
  354. os.remove(postFilename)
  355. def validNickname(domain: str,nickname: str) -> bool:
  356. forbiddenChars=['.',' ','/','?',':',';','@']
  357. for c in forbiddenChars:
  358. if c in nickname:
  359. return False
  360. if nickname==domain:
  361. return False
  362. reservedNames=['inbox','dm','outbox','following','public','followers','profile','channel','capabilities','calendar','tlreplies','tlmedia','moderation']
  363. if nickname in reservedNames:
  364. return False
  365. return True
  366. def noOfAccounts(baseDir: str) -> bool:
  367. """Returns the number of accounts on the system
  368. """
  369. accountCtr=0
  370. for subdir, dirs, files in os.walk(baseDir+'/accounts'):
  371. for account in dirs:
  372. if '@' in account:
  373. if not account.startswith('inbox@'):
  374. accountCtr+=1
  375. return accountCtr
  376. def noOfActiveAccountsMonthly(baseDir: str,months: int) -> bool:
  377. """Returns the number of accounts on the system this month
  378. """
  379. accountCtr=0
  380. currTime=int(time.time())
  381. monthSeconds=int(60*60*24*30*months)
  382. for subdir, dirs, files in os.walk(baseDir+'/accounts'):
  383. for account in dirs:
  384. if '@' in account:
  385. if not account.startswith('inbox@'):
  386. lastUsedFilename=baseDir+'/accounts/'+account+'/.lastUsed'
  387. if os.path.isfile(lastUsedFilename):
  388. with open(lastUsedFilename, 'r') as lastUsedFile:
  389. lastUsed = lastUsedFile.read()
  390. if lastUsed.isdigit():
  391. timeDiff=(currTime-int(lastUsed))
  392. if timeDiff<monthSeconds:
  393. accountCtr+=1
  394. return accountCtr
  395. def isPublicPost(postJsonObject: {}) -> bool:
  396. """Returns true if the given post is public
  397. """
  398. if not postJsonObject.get('type'):
  399. return False
  400. if postJsonObject['type']!='Create':
  401. return False
  402. if not postJsonObject.get('object'):
  403. return False
  404. if not isinstance(postJsonObject['object'], dict):
  405. return False
  406. if not postJsonObject['object'].get('to'):
  407. return False
  408. for recipient in postJsonObject['object']['to']:
  409. if recipient.endswith('#Public'):
  410. return True
  411. return False
  412. def copytree(src: str, dst: str, symlinks=False, ignore=None):
  413. """Copy a directory
  414. """
  415. for item in os.listdir(src):
  416. s = os.path.join(src, item)
  417. d = os.path.join(dst, item)
  418. if os.path.isdir(s):
  419. shutil.copytree(s, d, symlinks, ignore)
  420. else:
  421. shutil.copy2(s, d)
  422. def getCachedPostDirectory(baseDir: str,nickname: str,domain: str) -> str:
  423. """Returns the directory where the html post cache exists
  424. """
  425. htmlPostCacheDir=baseDir+'/accounts/'+nickname+'@'+domain+'/postcache'
  426. return htmlPostCacheDir
  427. def getCachedPostFilename(baseDir: str,nickname: str,domain: str, \
  428. postJsonObject: {}) -> str:
  429. """Returns the html cache filename for the given post
  430. """
  431. cachedPostFilename= \
  432. getCachedPostDirectory(baseDir,nickname,domain)+ \
  433. '/'+postJsonObject['id'].replace('/activity','').replace('/','#')+'.html'
  434. return cachedPostFilename