update.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. #!/usr/bin/env python3
  2. import readline
  3. import hashlib
  4. import signal
  5. import json
  6. import glob
  7. import sys
  8. import os
  9. assert sys.version_info >= (3, 6), "Python version too old. Python >=3.6.0 needed."
  10. # subset of https://spdx.org/licenses/
  11. valid_licenses = ["", "CC0-1.0", "Unlicense", "CC-BY-3.0", "CC-BY-NC-SA-3.0", "CC-BY-SA-4.0", "CC-BY-SA-3.0", "GFDL-1.3-or-later", "LAL-1.3"]
  12. valid_languages = ["", "dutch", "english", "french", "german"]
  13. # Only works on *nix systems
  14. def rlinput(prompt, prefill=""):
  15. readline.set_startup_hook(lambda: readline.insert_text(prefill))
  16. try:
  17. return input(prompt)
  18. finally:
  19. readline.set_startup_hook()
  20. def check_duplicate_images():
  21. def hash_file(path):
  22. with open(path, "rb") as file:
  23. hasher = hashlib.sha1()
  24. hasher.update(file.read())
  25. return hasher.hexdigest()
  26. dups_found = False
  27. hashes = {}
  28. for entry in glob.glob("images/", recursive=True):
  29. if os.path.isfile(entry):
  30. hash = hash_file(entry)
  31. if hash in hashes:
  32. print("Warning: Files identical: '{}' and '{}'".format(entry, hashes[hash]))
  33. dups_found = True
  34. else:
  35. hashes[hash] = entry
  36. return dups_found
  37. def get_defaults_entry(db, prev, image):
  38. name, ext = os.path.splitext(image)
  39. common_prefix = ''
  40. common_key = ''
  41. for key in db:
  42. p = os.path.commonprefix([key, name])
  43. if len(p) > len(common_prefix):
  44. common_prefix = p
  45. common_key = key
  46. if len(common_prefix) > 4 and ((100 * len(common_prefix)) / len(name)) > 60:
  47. # common prefix is >60% of the image name length
  48. return db[common_key]
  49. else:
  50. # use previous image meta data as default
  51. return prev
  52. def is_valid_author(author):
  53. return True
  54. def is_valid_title(author):
  55. return True
  56. def is_valid_notes(notes):
  57. return True
  58. def is_valid_tags(tags):
  59. if tags.lower() != tags:
  60. print("Only lower case letters please.")
  61. return False
  62. return True
  63. def is_valid_license(licenses):
  64. for i, license in enumerate(licenses.split("/")):
  65. if i >= 2:
  66. print("Only two licenses allowed")
  67. return False
  68. if license not in valid_licenses:
  69. print("{} no in {}".format(license, valid_licenses))
  70. return False
  71. return True
  72. def is_valid_language(language):
  73. if language not in valid_languages:
  74. print("Valid languages: {}".format(valid_languages))
  75. return False
  76. return True
  77. def is_valid_link(link):
  78. if len(link) > 0:
  79. if not link.startswith("https://") and not link.startswith("http://"):
  80. print("Link must start with https://")
  81. return False
  82. return True
  83. def ask_value(prompt, is_valid, prefill=""):
  84. value = rlinput(prompt, prefill)
  85. while not is_valid(value):
  86. value = rlinput(prompt, value)
  87. return value.strip()
  88. # add or update image
  89. def handle_image(i, n, prev, db, image):
  90. print('#######################################')
  91. print('[{}/{}] "images/{}"'.format(i, n, image))
  92. print('#######################################')
  93. # get default values
  94. default = get_defaults_entry(db, prev[0], image)
  95. tags = default.get("tags", "")
  96. title = default.get("title", "")
  97. author = default.get("author", "")
  98. notes = default.get("notes", "")
  99. license = default.get("license", "")
  100. language = default.get("language", "")
  101. link = default.get("link", "")
  102. while True:
  103. tags = ask_value("Tags: ", is_valid_tags, tags)
  104. title = ask_value("Title: ", is_valid_title, title)
  105. author = ask_value("Author: ", is_valid_author, author)
  106. notes = ask_value("Notes: ", is_valid_notes, notes)
  107. license = ask_value("License: ", is_valid_license, license)
  108. language = ask_value("Language: ", is_valid_language, language)
  109. link = ask_value("Link: ", is_valid_link, link)
  110. answer = ask_value("next (1), again (2), skip (3), exit (4): ",
  111. lambda v: v in ["1", "2", "3", "4"], "1")
  112. if answer == "1":
  113. break
  114. if answer == "2":
  115. pass
  116. if answer == "3":
  117. return 0
  118. if answer == "4":
  119. return -1
  120. obj = {}
  121. if len(tags) > 0:
  122. obj["tags"] = tags
  123. if len(title) > 0:
  124. obj["title"] = title
  125. if len(language) > 0:
  126. obj["language"] = language
  127. if len(author) > 0:
  128. obj["author"] = author
  129. if len(notes) > 0:
  130. obj["notes"] = notes
  131. if len(license) > 0:
  132. obj["license"] = license
  133. if len(link) > 0:
  134. obj["link"] = link
  135. db[image] = obj
  136. prev[0] = obj
  137. print("done")
  138. return 1
  139. def add_previews(db):
  140. def find_images_paths(name):
  141. images = []
  142. for entry in glob.glob("images/{}/*".format(name), recursive=True):
  143. if not os.path.isfile(entry):
  144. continue
  145. if entry.endswith(".pdf") or entry.endswith(".png") or entry.endswith(".jpg") or entry.endswith(".svg"):
  146. images.append(entry)
  147. return images
  148. for name in db:
  149. if not os.path.isfile("images/{}/preview.webp".format(name)):
  150. image_paths = find_images_paths(name)
  151. print("Create preview image: 'images/{}/preview.webp'".format(name))
  152. done = False
  153. for path in image_paths:
  154. rc = os.system("convert -resize 300 '{}' 'images/{}/preview.webp'".format(path, name))
  155. if rc == 0:
  156. done = True
  157. break
  158. if not done:
  159. if len(image_paths) == 0:
  160. print("No image found for images/{}/preview.webp".format(name))
  161. else:
  162. print("Failed to create preview for images/{}/preview.webp".format(name))
  163. def update_file_listings(path, create_index=False):
  164. entries = []
  165. for entry in glob.glob("{}/*".format(path)):
  166. if not entry.endswith("/index.html"):
  167. entries.append(entry)
  168. if create_index:
  169. with open("{}/index.html".format(path), "w") as file:
  170. file.write("<html>\n <head>\n")
  171. file.write(" <title>Directory listing for {}</title>\n".format(path))
  172. file.write(" <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n")
  173. file.write(" </head>\n <body>\n")
  174. file.write(" <h1>Directory listing for {}</h1>\n".format(path))
  175. file.write(" <hr>\n <ul>\n")
  176. for entry in entries:
  177. name = os.path.basename(entry)
  178. if name != "preview.webp":
  179. file.write(" <li><a href=\"{}\">{}</a></li>\n".format(name, name))
  180. file.write(" </ul>\n </body>\n</html>\n")
  181. for entry in entries:
  182. if os.path.isdir(entry):
  183. update_file_listings(entry, True)
  184. def save_database(db, new_image_count):
  185. # write anyway, this will format manual edits to data.json
  186. with open("data.json", "w") as outfile:
  187. json.dump(db, outfile, indent=" ", sort_keys=True)
  188. print("Wrote {} new entries to data.json => done".format(new_image_count))
  189. def main():
  190. def get_database():
  191. with open("data.json") as file:
  192. return json.load(file)
  193. def get_image_set():
  194. images = set()
  195. for image in os.listdir("images/"):
  196. if os.path.isdir("images/{}".format(image)):
  197. images.add(image)
  198. return images
  199. db = get_database()
  200. db_images = db.keys()
  201. images = []
  202. if len(sys.argv) > 1:
  203. for image in sys.argv[1:]:
  204. if not image.startswith("images/"):
  205. print("Outside images folder: {}".format(image))
  206. sys.exit(1)
  207. elif os.path.isdir(image):
  208. images.append(os.path.basename(image))
  209. else:
  210. print("folder {} does not exist".format(image))
  211. sys.exit(1)
  212. else:
  213. images = list(get_image_set() - set(db_images))
  214. images.sort()
  215. if check_duplicate_images():
  216. print("Please remove duplicate files first!")
  217. return
  218. old_image_count = len(db_images)
  219. new_image_count = 0
  220. def sigint_handler():
  221. if new_image_count > 0:
  222. print("\nNothing saved")
  223. print("")
  224. sys.exit(0)
  225. if len(images) > 0:
  226. # Exit Ctrl+C gracefully
  227. signal.signal(signal.SIGINT, lambda sig, frame: sigint_handler())
  228. answer = input("Start to add {} new image folders [Y, n]? ".format(len(images)))
  229. if answer == "n":
  230. return
  231. prev = [{}] # use list for pass by reference
  232. for i, image in enumerate(images):
  233. ret = handle_image(i + 1, len(images), prev, db, image)
  234. if ret > 0:
  235. new_image_count += 1
  236. if ret < 0:
  237. break
  238. add_previews(db)
  239. update_file_listings("images")
  240. save_database(db, new_image_count)
  241. if __name__ == "__main__":
  242. main()