utils.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554
  1. __filename__ = "utils.py"
  2. __author__ = "Bob Mottram"
  3. __license__ = "AGPL3+"
  4. __version__ = "1.1.0"
  5. __maintainer__ = "Bob Mottram"
  6. __email__ = "bob@freedombone.net"
  7. __status__ = "Production"
  8. import os
  9. import time
  10. import shutil
  11. import datetime
  12. import json
  13. def saveJson(jsonObject: {},filename: str) -> bool:
  14. """Saves json to a file
  15. """
  16. tries=0
  17. while tries<5:
  18. try:
  19. with open(filename, 'w') as fp:
  20. fp.write(json.dumps(jsonObject))
  21. return True
  22. except:
  23. print('WARN: saveJson '+str(tries))
  24. time.sleep(1)
  25. tries+=1
  26. return False
  27. def loadJson(filename: str,delaySec=2) -> {}:
  28. """Makes a few attempts to load a json formatted file
  29. """
  30. jsonObject=None
  31. tries=0
  32. while tries<5:
  33. try:
  34. with open(filename, 'r') as fp:
  35. data=fp.read()
  36. jsonObject=json.loads(data)
  37. break
  38. except:
  39. print('WARN: loadJson exception')
  40. if delaySec>0:
  41. time.sleep(delaySec)
  42. tries+=1
  43. return jsonObject
  44. def getStatusNumber() -> (str,str):
  45. """Returns the status number and published date
  46. """
  47. currTime=datetime.datetime.utcnow()
  48. daysSinceEpoch=(currTime - datetime.datetime(1970,1,1)).days
  49. # status is the number of seconds since epoch
  50. statusNumber=str(((daysSinceEpoch*24*60*60) + (currTime.hour*60*60) + (currTime.minute*60) + currTime.second)*1000 + int(currTime.microsecond/1000))
  51. # See https://github.com/tootsuite/mastodon/blob/995f8b389a66ab76ec92d9a240de376f1fc13a38/lib/mastodon/snowflake.rb
  52. # use the leftover microseconds as the sequence number
  53. sequenceId=currTime.microsecond % 1000
  54. # shift by 16bits "sequence data"
  55. statusNumber=str((int(statusNumber)<<16)+sequenceId)
  56. published=currTime.strftime("%Y-%m-%dT%H:%M:%SZ")
  57. return statusNumber,published
  58. def isEvil(domain: str) -> bool:
  59. if not isinstance(domain, str):
  60. print('WARN: Malformed domain '+str(domain))
  61. return True
  62. # https://www.youtube.com/watch?v=5qw1hcevmdU
  63. evilDomains=('gab.com','gabfed.com','spinster.xyz','kiwifarms.cc','djitter.com')
  64. for concentratedEvil in evilDomains:
  65. if domain.endswith(concentratedEvil):
  66. return True
  67. return False
  68. def createPersonDir(nickname: str,domain: str,baseDir: str,dirname: str) -> str:
  69. """Create a directory for a person
  70. """
  71. handle=nickname+'@'+domain
  72. if not os.path.isdir(baseDir+'/accounts/'+handle):
  73. os.mkdir(baseDir+'/accounts/'+handle)
  74. boxDir=baseDir+'/accounts/'+handle+'/'+dirname
  75. if not os.path.isdir(boxDir):
  76. os.mkdir(boxDir)
  77. return boxDir
  78. def createOutboxDir(nickname: str,domain: str,baseDir: str) -> str:
  79. """Create an outbox for a person
  80. """
  81. return createPersonDir(nickname,domain,baseDir,'outbox')
  82. def createInboxQueueDir(nickname: str,domain: str,baseDir: str) -> str:
  83. """Create an inbox queue and returns the feed filename and directory
  84. """
  85. return createPersonDir(nickname,domain,baseDir,'queue')
  86. def domainPermitted(domain: str, federationList: []):
  87. if len(federationList)==0:
  88. return True
  89. if ':' in domain:
  90. domain=domain.split(':')[0]
  91. if domain in federationList:
  92. return True
  93. return False
  94. def urlPermitted(url: str,federationList: [],capability: str):
  95. if isEvil(url):
  96. return False
  97. if not federationList:
  98. return True
  99. for domain in federationList:
  100. if domain in url:
  101. return True
  102. return False
  103. def getDisplayName(baseDir: str,actor: str,personCache: {}) -> str:
  104. """Returns the display name for the given actor
  105. """
  106. if '/statuses/' in actor:
  107. actor=actor.split('/statuses/')[0]
  108. if not personCache.get(actor):
  109. return None
  110. if personCache[actor].get('actor'):
  111. if personCache[actor]['actor'].get('name'):
  112. return personCache[actor]['actor']['name']
  113. else:
  114. # Try to obtain from the cached actors
  115. cachedActorFilename=baseDir+'/cache/actors/'+(actor.replace('/','#'))+'.json'
  116. if os.path.isfile(cachedActorFilename):
  117. actorJson=loadJson(cachedActorFilename,1)
  118. if actorJson:
  119. if actorJson.get('name'):
  120. return(actorJson['name'])
  121. return None
  122. def getNicknameFromActor(actor: str) -> str:
  123. """Returns the nickname from an actor url
  124. """
  125. if '/users/' not in actor:
  126. if '/profile/' in actor:
  127. nickStr=actor.split('/profile/')[1].replace('@','')
  128. if '/' not in nickStr:
  129. return nickStr
  130. else:
  131. return nickStr.split('/')[0]
  132. if '/channel/' in actor:
  133. nickStr=actor.split('/channel/')[1].replace('@','')
  134. if '/' not in nickStr:
  135. return nickStr
  136. else:
  137. return nickStr.split('/')[0]
  138. # https://domain/@nick
  139. if '/@' in actor:
  140. nickStr=actor.split('/@')[1]
  141. if '/' in nickStr:
  142. nickStr=nickStr.split('/')[0]
  143. return nickStr
  144. return None
  145. nickStr=actor.split('/users/')[1].replace('@','')
  146. if '/' not in nickStr:
  147. return nickStr
  148. else:
  149. return nickStr.split('/')[0]
  150. def getDomainFromActor(actor: str) -> (str,int):
  151. """Returns the domain name from an actor url
  152. """
  153. port=None
  154. if '/profile/' in actor:
  155. domain = actor.split('/profile/')[0].replace('https://','').replace('http://','').replace('dat://','')
  156. else:
  157. if '/channel/' in actor:
  158. domain = actor.split('/channel/')[0].replace('https://','').replace('http://','').replace('dat://','')
  159. else:
  160. if '/users/' not in actor:
  161. domain = actor.replace('https://','').replace('http://','').replace('dat://','')
  162. if '/' in actor:
  163. domain=domain.split('/')[0]
  164. else:
  165. domain = actor.split('/users/')[0].replace('https://','').replace('http://','').replace('dat://','')
  166. if ':' in domain:
  167. port=int(domain.split(':')[1])
  168. domain=domain.split(':')[0]
  169. return domain,port
  170. def followPerson(baseDir: str,nickname: str, domain: str, \
  171. followNickname: str, followDomain: str, \
  172. federationList: [],debug: bool, \
  173. followFile='following.txt') -> bool:
  174. """Adds a person to the follow list
  175. """
  176. if not domainPermitted(followDomain.lower().replace('\n',''), \
  177. federationList):
  178. if debug:
  179. print('DEBUG: follow of domain '+followDomain+' not permitted')
  180. return False
  181. if debug:
  182. print('DEBUG: follow of domain '+followDomain)
  183. if ':' in domain:
  184. handle=nickname+'@'+domain.split(':')[0].lower()
  185. else:
  186. handle=nickname+'@'+domain.lower()
  187. if ':' in followDomain:
  188. handleToFollow=followNickname+'@'+followDomain.split(':')[0].lower()
  189. else:
  190. handleToFollow=followNickname+'@'+followDomain.lower()
  191. if not os.path.isdir(baseDir+'/accounts'):
  192. os.mkdir(baseDir+'/accounts')
  193. if not os.path.isdir(baseDir+'/accounts/'+handle):
  194. os.mkdir(baseDir+'/accounts/'+handle)
  195. filename=baseDir+'/accounts/'+handle+'/'+followFile
  196. if os.path.isfile(filename):
  197. if handleToFollow in open(filename).read():
  198. if debug:
  199. print('DEBUG: follow already exists')
  200. return True
  201. # prepend to follow file
  202. try:
  203. with open(filename, 'r+') as followFile:
  204. content = followFile.read()
  205. followFile.seek(0, 0)
  206. followFile.write(followNickname+'@'+followDomain+'\n'+content)
  207. if debug:
  208. print('DEBUG: follow added')
  209. return True
  210. except Exception as e:
  211. print('WARN: Failed to write entry to follow file '+filename+' '+str(e))
  212. if debug:
  213. print('DEBUG: creating new following file')
  214. with open(filename, "w") as followfile:
  215. followfile.write(followNickname+'@'+followDomain+'\n')
  216. return True
  217. def locatePost(baseDir: str,nickname: str,domain: str,postUrl: str,replies=False) -> str:
  218. """Returns the filename for the given status post url
  219. """
  220. if not replies:
  221. extension='json'
  222. else:
  223. extension='replies'
  224. # if this post in the shared inbox?
  225. handle='inbox@'+domain
  226. postUrl=postUrl.replace('/','#').replace('/activity','').strip()
  227. boxName='inbox'
  228. postFilename=baseDir+'/accounts/'+nickname+'@'+domain+'/'+boxName+'/'+postUrl+'.'+extension
  229. if os.path.isfile(postFilename):
  230. return postFilename
  231. boxName='outbox'
  232. postFilename=baseDir+'/accounts/'+nickname+'@'+domain+'/'+boxName+'/'+postUrl+'.'+extension
  233. if os.path.isfile(postFilename):
  234. return postFilename
  235. # if this post in the inbox of the person?
  236. boxName='inbox'
  237. postFilename=baseDir+'/accounts/'+nickname+'@'+domain+'/'+boxName+'/'+postUrl+'.'+extension
  238. if os.path.isfile(postFilename):
  239. return postFilename
  240. postFilename=baseDir+'/cache/announce/'+nickname+'/'+postUrl+'.'+extension
  241. if os.path.isfile(postFilename):
  242. return postFilename
  243. print('WARN: unable to locate '+nickname+' '+postUrl+'.'+extension)
  244. return None
  245. def removeAttachment(baseDir: str,httpPrefix: str,domain: str,postJson: {}):
  246. if not postJson.get('attachment'):
  247. return
  248. if not postJson['attachment'][0].get('url'):
  249. return
  250. if port:
  251. if port!=80 and port!=443:
  252. if ':' not in domain:
  253. domain=domain+':'+str(port)
  254. attachmentUrl=postJson['attachment'][0]['url']
  255. if not attachmentUrl:
  256. return
  257. mediaFilename=baseDir+'/'+attachmentUrl.replace(httpPrefix+'://'+domain+'/','')
  258. if os.path.isfile(mediaFilename):
  259. os.remove(mediaFilename)
  260. etagFilename=mediaFilename+'.etag'
  261. if os.path.isfile(etagFilename):
  262. os.remove(etagFilename)
  263. postJson['attachment']=[]
  264. def removeModerationPostFromIndex(baseDir: str,postUrl: str,debug: bool) -> None:
  265. """Removes a url from the moderation index
  266. """
  267. moderationIndexFile=baseDir+'/accounts/moderation.txt'
  268. if not os.path.isfile(moderationIndexFile):
  269. return
  270. postId=postUrl.replace('/activity','')
  271. if postId in open(moderationIndexFile).read():
  272. with open(moderationIndexFile, "r") as f:
  273. lines = f.readlines()
  274. with open(moderationIndexFile, "w+") as f:
  275. for line in lines:
  276. if line.strip("\n") != postId:
  277. f.write(line)
  278. else:
  279. if debug:
  280. print('DEBUG: removed '+postId+' from moderation index')
  281. def deletePost(baseDir: str,httpPrefix: str,nickname: str,domain: str,postFilename: str,debug: bool) -> None:
  282. """Recursively deletes a post and its replies and attachments
  283. """
  284. postJsonObject=loadJson(postFilename,1)
  285. if postJsonObject:
  286. # don't allow deletion of bookmarked posts
  287. bookmarksIndexFilename=baseDir+'/accounts/'+nickname+'@'+domain+'/bookmarks.index'
  288. if os.path.isfile(bookmarksIndexFilename):
  289. bookmarkIndex=postFilename.split('/')[-1]+'\n'
  290. if bookmarkIndex in open(bookmarksIndexFilename).read():
  291. return
  292. # remove any attachment
  293. removeAttachment(baseDir,httpPrefix,domain,postJsonObject)
  294. # remove any mute file
  295. muteFilename=postFilename+'.muted'
  296. if os.path.isfile(muteFilename):
  297. os.remove(muteFilename)
  298. # remove cached html version of the post
  299. cachedPostFilename= \
  300. getCachedPostFilename(baseDir,nickname,domain,postJsonObject)
  301. if cachedPostFilename:
  302. if os.path.isfile(cachedPostFilename):
  303. os.remove(cachedPostFilename)
  304. #removePostFromCache(postJsonObject,recentPostsCache)
  305. hasObject=False
  306. if postJsonObject.get('object'):
  307. hasObject=True
  308. # remove from moderation index file
  309. if hasObject:
  310. if isinstance(postJsonObject['object'], dict):
  311. if postJsonObject['object'].get('moderationStatus'):
  312. if postJsonObject.get('id'):
  313. postId=postJsonObject['id'].replace('/activity','')
  314. removeModerationPostFromIndex(baseDir,postId,debug)
  315. # remove any hashtags index entries
  316. removeHashtagIndex=False
  317. if hasObject:
  318. if hasObject and isinstance(postJsonObject['object'], dict):
  319. if postJsonObject['object'].get('content'):
  320. if '#' in postJsonObject['object']['content']:
  321. removeHashtagIndex=True
  322. if removeHashtagIndex:
  323. if postJsonObject['object'].get('id') and postJsonObject['object'].get('tag'):
  324. # get the id of the post
  325. postId=postJsonObject['object']['id'].replace('/activity','')
  326. for tag in postJsonObject['object']['tag']:
  327. if tag['type']!='Hashtag':
  328. continue
  329. if not tag.get('name'):
  330. continue
  331. # find the index file for this tag
  332. tagIndexFilename=baseDir+'/tags/'+tag['name'][1:]+'.txt'
  333. if not os.path.isfile(tagIndexFilename):
  334. continue
  335. # remove postId from the tag index file
  336. lines=None
  337. with open(tagIndexFilename, "r") as f:
  338. lines=f.readlines()
  339. if lines:
  340. newlines=''
  341. for l in lines:
  342. if postId in l:
  343. continue
  344. newlines+=l
  345. if not newlines.strip():
  346. # if there are no lines then remove the hashtag file
  347. os.remove(tagIndexFilename)
  348. else:
  349. with open(tagIndexFilename, "w+") as f:
  350. f.write(newlines)
  351. # remove any replies
  352. repliesFilename=postFilename.replace('.json','.replies')
  353. if os.path.isfile(repliesFilename):
  354. if debug:
  355. print('DEBUG: removing replies to '+postFilename)
  356. with open(repliesFilename,'r') as f:
  357. for replyId in f:
  358. replyFile=locatePost(baseDir,nickname,domain,replyId)
  359. if replyFile:
  360. if os.path.isfile(replyFile):
  361. deletePost(baseDir,httpPrefix,nickname,domain,replyFile,debug)
  362. # remove the replies file
  363. os.remove(repliesFilename)
  364. # finally, remove the post itself
  365. os.remove(postFilename)
  366. def validNickname(domain: str,nickname: str) -> bool:
  367. forbiddenChars=['.',' ','/','?',':',';','@']
  368. for c in forbiddenChars:
  369. if c in nickname:
  370. return False
  371. if nickname==domain:
  372. return False
  373. reservedNames=['inbox','dm','outbox','following','public','followers','profile','channel','capabilities','calendar','tlreplies','tlmedia','moderation','activity','undo','reply','replies','question','like','likes','users','statuses','updates','repeat','announce','shares']
  374. if nickname in reservedNames:
  375. return False
  376. return True
  377. def noOfAccounts(baseDir: str) -> bool:
  378. """Returns the number of accounts on the system
  379. """
  380. accountCtr=0
  381. for subdir, dirs, files in os.walk(baseDir+'/accounts'):
  382. for account in dirs:
  383. if '@' in account:
  384. if not account.startswith('inbox@'):
  385. accountCtr+=1
  386. return accountCtr
  387. def noOfActiveAccountsMonthly(baseDir: str,months: int) -> bool:
  388. """Returns the number of accounts on the system this month
  389. """
  390. accountCtr=0
  391. currTime=int(time.time())
  392. monthSeconds=int(60*60*24*30*months)
  393. for subdir, dirs, files in os.walk(baseDir+'/accounts'):
  394. for account in dirs:
  395. if '@' in account:
  396. if not account.startswith('inbox@'):
  397. lastUsedFilename=baseDir+'/accounts/'+account+'/.lastUsed'
  398. if os.path.isfile(lastUsedFilename):
  399. with open(lastUsedFilename, 'r') as lastUsedFile:
  400. lastUsed = lastUsedFile.read()
  401. if lastUsed.isdigit():
  402. timeDiff=(currTime-int(lastUsed))
  403. if timeDiff<monthSeconds:
  404. accountCtr+=1
  405. return accountCtr
  406. def isPublicPostFromUrl(baseDir: str,nickname: str,domain: str,postUrl: str) -> bool:
  407. """Returns whether the given url is a public post
  408. """
  409. postFilename=locatePost(baseDir,nickname,domain,postUrl)
  410. if not postFilename:
  411. return False
  412. postJsonObject=loadJson(postFilename,1)
  413. if not postJsonObject:
  414. return False
  415. return isPublicPost(postJsonObject)
  416. def isPublicPost(postJsonObject: {}) -> bool:
  417. """Returns true if the given post is public
  418. """
  419. if not postJsonObject.get('type'):
  420. return False
  421. if postJsonObject['type']!='Create':
  422. return False
  423. if not postJsonObject.get('object'):
  424. return False
  425. if not isinstance(postJsonObject['object'], dict):
  426. return False
  427. if not postJsonObject['object'].get('to'):
  428. return False
  429. for recipient in postJsonObject['object']['to']:
  430. if recipient.endswith('#Public'):
  431. return True
  432. return False
  433. def copytree(src: str, dst: str, symlinks=False, ignore=None):
  434. """Copy a directory
  435. """
  436. for item in os.listdir(src):
  437. s = os.path.join(src, item)
  438. d = os.path.join(dst, item)
  439. if os.path.isdir(s):
  440. shutil.copytree(s, d, symlinks, ignore)
  441. else:
  442. shutil.copy2(s, d)
  443. def getCachedPostDirectory(baseDir: str,nickname: str,domain: str) -> str:
  444. """Returns the directory where the html post cache exists
  445. """
  446. htmlPostCacheDir=baseDir+'/accounts/'+nickname+'@'+domain+'/postcache'
  447. return htmlPostCacheDir
  448. def getCachedPostFilename(baseDir: str,nickname: str,domain: str, \
  449. postJsonObject: {}) -> str:
  450. """Returns the html cache filename for the given post
  451. """
  452. cachedPostDir=getCachedPostDirectory(baseDir,nickname,domain)
  453. if not os.path.isdir(cachedPostDir):
  454. #print('ERROR: invalid html cache directory '+cachedPostDir)
  455. return None
  456. if '@' not in cachedPostDir:
  457. #print('ERROR: invalid html cache directory '+cachedPostDir)
  458. return None
  459. cachedPostFilename= \
  460. cachedPostDir+ \
  461. '/'+postJsonObject['id'].replace('/activity','').replace('/','#')
  462. cachedPostFilename=cachedPostFilename+'.html'
  463. return cachedPostFilename
  464. def removePostFromCache(postJsonObject: {},recentPostsCache: {}):
  465. """ if the post exists in the recent posts cache then remove it
  466. """
  467. if not postJsonObject.get('id'):
  468. return
  469. if not recentPostsCache.get('index'):
  470. return
  471. postId=postJsonObject['id']
  472. if '#' in postId:
  473. postId=postId.split('#',1)[0]
  474. postId=postId.replace('/activity','').replace('/','#')
  475. if postId not in recentPostsCache['index']:
  476. return
  477. if recentPostsCache['json'].get(postId):
  478. del recentPostsCache['json'][postId]
  479. if recentPostsCache['html'].get(postId):
  480. del recentPostsCache['html'][postId]
  481. recentPostsCache['index'].remove(postId)
  482. def updateRecentPostsCache(recentPostsCache: {},maxRecentPosts: int, \
  483. postJsonObject: {},htmlStr: str) -> None:
  484. """Store recent posts in memory so that they can be quickly recalled
  485. """
  486. if not postJsonObject.get('id'):
  487. return
  488. postId=postJsonObject['id']
  489. if '#' in postId:
  490. postId=postId.split('#',1)[0]
  491. postId=postId.replace('/activity','').replace('/','#')
  492. if recentPostsCache.get('index'):
  493. if postId in recentPostsCache['index']:
  494. return
  495. recentPostsCache['index'].append(postId)
  496. postJsonObject['muted']=False
  497. recentPostsCache['json'][postId]=json.dumps(postJsonObject)
  498. recentPostsCache['html'][postId]=htmlStr
  499. while len(recentPostsCache['html'].items())>maxRecentPosts:
  500. recentPostsCache['index'].pop(0)
  501. del recentPostsCache['json'][postId]
  502. del recentPostsCache['html'][postId]
  503. else:
  504. recentPostsCache['index']=[postId]
  505. recentPostsCache['json']={}
  506. recentPostsCache['html']={}
  507. recentPostsCache['json'][postId]=json.dumps(postJsonObject)
  508. recentPostsCache['html'][postId]=htmlStr