summaryrefslogtreecommitdiff
path: root/bin/importer.py
diff options
context:
space:
mode:
Diffstat (limited to 'bin/importer.py')
-rw-r--r--bin/importer.py351
1 files changed, 351 insertions, 0 deletions
diff --git a/bin/importer.py b/bin/importer.py
new file mode 100644
index 0000000..592b84d
--- /dev/null
+++ b/bin/importer.py
@@ -0,0 +1,351 @@
+#!/usr/bin/env python3
+# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
+
+# Copyright 2014-2019 Florian Bruhin (The Compiler) <mail@qutebrowser.org>
+# Copyright 2014-2018 Claude (longneck) <longneck@scratchbook.ch>
+
+# This file is part of qutebrowser.
+#
+# qutebrowser is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# qutebrowser is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
+
+
+"""Tool to import data from other browsers.
+
+Currently importing bookmarks from Netscape Bookmark files and Mozilla
+profiles is supported.
+"""
+
+
+import argparse
+import sqlite3
+import os
+import urllib.parse
+import json
+import string
+
+browser_default_input_format = {
+ 'chromium': 'chrome',
+ 'chrome': 'chrome',
+ 'ie': 'netscape',
+ 'firefox': 'mozilla',
+ 'seamonkey': 'mozilla',
+ 'palemoon': 'mozilla',
+}
+
+
+def main():
+ args = get_args()
+ bookmark_types = []
+ output_format = None
+ input_format = args.input_format
+ if args.search_output:
+ bookmark_types = ['search']
+ if args.oldconfig:
+ output_format = 'oldsearch'
+ else:
+ output_format = 'search'
+ else:
+ if args.bookmark_output:
+ output_format = 'bookmark'
+ elif args.quickmark_output:
+ output_format = 'quickmark'
+ if args.import_bookmarks:
+ bookmark_types.append('bookmark')
+ if args.import_keywords:
+ bookmark_types.append('keyword')
+ if not bookmark_types:
+ bookmark_types = ['bookmark', 'keyword']
+ if not output_format:
+ output_format = 'quickmark'
+ if not input_format:
+ if args.browser:
+ input_format = browser_default_input_format[args.browser]
+ else:
+ #default to netscape
+ input_format = 'netscape'
+
+ import_function = {
+ 'netscape': import_netscape_bookmarks,
+ 'mozilla': import_moz_places,
+ 'chrome': import_chrome,
+ }
+ import_function[input_format](args.bookmarks, bookmark_types,
+ output_format)
+
+
+def get_args():
+ """Get the argparse parser."""
+ parser = argparse.ArgumentParser(
+ epilog="To import bookmarks from Chromium, Firefox or IE, "
+ "export them to HTML in your browsers bookmark manager. ")
+ parser.add_argument(
+ 'browser',
+ help="Which browser? {%(choices)s}",
+ choices=browser_default_input_format.keys(),
+ nargs='?',
+ metavar='browser')
+ parser.add_argument(
+ '-i',
+ '--input-format',
+ help='Which input format? (overrides browser default; "netscape" if '
+ 'neither given)',
+ choices=set(browser_default_input_format.values()),
+ required=False)
+ parser.add_argument(
+ '-b',
+ '--bookmark-output',
+ help="Output in bookmark format.",
+ action='store_true',
+ default=False,
+ required=False)
+ parser.add_argument(
+ '-q',
+ '--quickmark-output',
+ help="Output in quickmark format (default).",
+ action='store_true',
+ default=False,
+ required=False)
+ parser.add_argument(
+ '-s',
+ '--search-output',
+ help="Output config.py search engine format (negates -B and -K)",
+ action='store_true',
+ default=False,
+ required=False)
+ parser.add_argument(
+ '--oldconfig',
+ help="Output search engine format for old qutebrowser.conf format",
+ default=False,
+ action='store_true',
+ required=False)
+ parser.add_argument(
+ '-B',
+ '--import-bookmarks',
+ help="Import plain bookmarks (can be combiend with -K)",
+ action='store_true',
+ default=False,
+ required=False)
+ parser.add_argument(
+ '-K',
+ '--import-keywords',
+ help="Import keywords (can be combined with -B)",
+ action='store_true',
+ default=False,
+ required=False)
+ parser.add_argument(
+ 'bookmarks',
+ help="Bookmarks file (html format) or "
+ "profile folder (Mozilla format)")
+ args = parser.parse_args()
+ return args
+
+
+def search_escape(url):
+ """Escape URLs such that preexisting { and } are handled properly.
+
+ Will obviously trash a properly-formatted qutebrowser URL.
+ """
+ return url.replace('{', '{{').replace('}', '}}')
+
+
+def opensearch_convert(url):
+ """Convert a basic OpenSearch URL into something qutebrowser can use.
+
+ Exceptions:
+ KeyError:
+ An unknown and required parameter is present in the URL. This
+ usually means there's browser/addon specific functionality needed
+ to build the URL (I'm looking at you and your browser, Google) that
+ obviously won't be present here.
+ """
+ subst = {
+ 'searchTerms': '%s', # for proper escaping later
+ 'language': '*',
+ 'inputEncoding': 'UTF-8',
+ 'outputEncoding': 'UTF-8'
+ }
+
+ # remove optional parameters (even those we don't support)
+ for param in string.Formatter().parse(url):
+ if param[1]:
+ if param[1].endswith('?'):
+ url = url.replace('{' + param[1] + '}', '')
+ elif param[2] and param[2].endswith('?'):
+ url = url.replace('{' + param[1] + ':' + param[2] + '}', '')
+ return search_escape(url.format(**subst)).replace('%s', '{}')
+
+
+def import_netscape_bookmarks(bookmarks_file, bookmark_types, output_format):
+ """Import bookmarks from a NETSCAPE-Bookmark-file v1.
+
+ Generated by Chromium, Firefox, IE and possibly more browsers. Not all
+ export all possible bookmark types:
+ - Firefox mostly works with everything
+ - Chrome doesn't support keywords at all; searches are a separate
+ database
+ """
+ import bs4
+ with open(bookmarks_file, encoding='utf-8') as f:
+ soup = bs4.BeautifulSoup(f, 'html.parser')
+ bookmark_query = {
+ 'search': lambda tag: (
+ (tag.name == 'a') and
+ ('shortcuturl' in tag.attrs) and
+ ('%s' in tag['href'])),
+ 'keyword': lambda tag: (
+ (tag.name == 'a') and
+ ('shortcuturl' in tag.attrs) and
+ ('%s' not in tag['href'])),
+ 'bookmark': lambda tag: (
+ (tag.name == 'a') and
+ ('shortcuturl' not in tag.attrs) and
+ (tag.string)),
+ }
+ output_template = {
+ 'search': {
+ 'search':
+ "c.url.searchengines['{tag[shortcuturl]}'] = "
+ "'{tag[href]}' #{tag.string}"
+ },
+ 'oldsearch': {
+ 'search': '{tag[shortcuturl]} = {tag[href]} #{tag.string}',
+ },
+ 'bookmark': {
+ 'bookmark': '{tag[href]} {tag.string}',
+ 'keyword': '{tag[href]} {tag.string}'
+ },
+ 'quickmark': {
+ 'bookmark': '{tag.string} {tag[href]}',
+ 'keyword': '{tag[shortcuturl]} {tag[href]}'
+ }
+ }
+ bookmarks = []
+ for typ in bookmark_types:
+ tags = soup.findAll(bookmark_query[typ])
+ for tag in tags:
+ if typ == 'search':
+ tag['href'] = search_escape(tag['href']).replace('%s', '{}')
+ if tag['href'] not in bookmarks:
+ bookmarks.append(
+ output_template[output_format][typ].format(tag=tag))
+ for bookmark in bookmarks:
+ print(bookmark)
+
+
+def import_moz_places(profile, bookmark_types, output_format):
+ """Import bookmarks from a Mozilla profile's places.sqlite database."""
+ place_query = {
+ 'bookmark': (
+ "SELECT DISTINCT moz_bookmarks.title,moz_places.url "
+ "FROM moz_bookmarks,moz_places "
+ "WHERE moz_places.id=moz_bookmarks.fk "
+ "AND moz_places.id NOT IN (SELECT place_id FROM moz_keywords) "
+ "AND moz_places.url NOT LIKE 'place:%';"
+ ), # Bookmarks with no keywords assigned
+ 'keyword': (
+ "SELECT moz_keywords.keyword,moz_places.url "
+ "FROM moz_keywords,moz_places,moz_bookmarks "
+ "WHERE moz_places.id=moz_bookmarks.fk "
+ "AND moz_places.id=moz_keywords.place_id "
+ "AND moz_places.url NOT LIKE '%!%s%' ESCAPE '!';"
+ ), # Bookmarks with keywords assigned but no %s substitution
+ 'search': (
+ "SELECT moz_keywords.keyword, "
+ " moz_bookmarks.title, "
+ " search_conv(moz_places.url) AS url "
+ "FROM moz_keywords,moz_places,moz_bookmarks "
+ "WHERE moz_places.id=moz_bookmarks.fk "
+ "AND moz_places.id=moz_keywords.place_id "
+ "AND moz_places.url LIKE '%!%s%' ESCAPE '!';"
+ ) # bookmarks with keyword and %s substitution
+ }
+ out_template = {
+ 'bookmark': {
+ 'bookmark': '{url} {title}',
+ 'keyword': '{url} {keyword}'
+ },
+ 'quickmark': {
+ 'bookmark': '{title} {url}',
+ 'keyword': '{keyword} {url}'
+ },
+ 'oldsearch': {
+ 'search': '{keyword} {url} #{title}'
+ },
+ 'search': {
+ 'search': "c.url.searchengines['{keyword}'] = '{url}' #{title}"
+ }
+ }
+
+ def search_conv(url):
+ return search_escape(url).replace('%s', '{}')
+
+ places = sqlite3.connect(os.path.join(profile, "places.sqlite"))
+ places.create_function('search_conv', 1, search_conv)
+ places.row_factory = sqlite3.Row
+ c = places.cursor()
+ for typ in bookmark_types:
+ c.execute(place_query[typ])
+ for row in c:
+ print(out_template[output_format][typ].format(**row))
+
+
+def import_chrome(profile, bookmark_types, output_format):
+ """Import bookmarks and search keywords from Chrome-type profiles.
+
+ On Chrome, keywords and search engines are the same thing and handled in
+ their own database table; bookmarks cannot have associated keywords. This
+ is why the dictionary lookups here are much simpler.
+ """
+ out_template = {
+ 'bookmark': '{url} {name}',
+ 'quickmark': '{name} {url}',
+ 'search': "c.url.searchengines['{keyword}'] = '{url}'",
+ 'oldsearch': '{keyword} {url}'
+ }
+
+ if 'search' in bookmark_types:
+ webdata = sqlite3.connect(os.path.join(profile, 'Web Data'))
+ c = webdata.cursor()
+ c.execute('SELECT keyword,url FROM keywords;')
+ for keyword, url in c:
+ try:
+ url = opensearch_convert(url)
+ print(out_template[output_format].format(
+ keyword=keyword, url=url))
+ except KeyError:
+ print('# Unsupported parameter in url for {}; skipping....'.
+ format(keyword))
+
+ else:
+ with open(os.path.join(profile, 'Bookmarks'), encoding='utf-8') as f:
+ bookmarks = json.load(f)
+
+ def bm_tree_walk(bm, template):
+ """Recursive function to walk through bookmarks."""
+ if not isinstance(bm, dict):
+ return
+ assert 'type' in bm, bm
+ if bm['type'] == 'url':
+ if urllib.parse.urlparse(bm['url']).scheme != 'chrome':
+ print(template.format(**bm))
+ elif bm['type'] == 'folder':
+ for child in bm['children']:
+ bm_tree_walk(child, template)
+
+ for root in bookmarks['roots'].values():
+ bm_tree_walk(root, out_template[output_format])
+
+
+if __name__ == '__main__':
+ main()