diff options
Diffstat (limited to 'bin/importer.py')
-rw-r--r-- | bin/importer.py | 351 |
1 files changed, 351 insertions, 0 deletions
diff --git a/bin/importer.py b/bin/importer.py new file mode 100644 index 0000000..592b84d --- /dev/null +++ b/bin/importer.py @@ -0,0 +1,351 @@ +#!/usr/bin/env python3 +# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et: + +# Copyright 2014-2019 Florian Bruhin (The Compiler) <mail@qutebrowser.org> +# Copyright 2014-2018 Claude (longneck) <longneck@scratchbook.ch> + +# This file is part of qutebrowser. +# +# qutebrowser is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# qutebrowser is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>. + + +"""Tool to import data from other browsers. + +Currently importing bookmarks from Netscape Bookmark files and Mozilla +profiles is supported. +""" + + +import argparse +import sqlite3 +import os +import urllib.parse +import json +import string + +browser_default_input_format = { + 'chromium': 'chrome', + 'chrome': 'chrome', + 'ie': 'netscape', + 'firefox': 'mozilla', + 'seamonkey': 'mozilla', + 'palemoon': 'mozilla', +} + + +def main(): + args = get_args() + bookmark_types = [] + output_format = None + input_format = args.input_format + if args.search_output: + bookmark_types = ['search'] + if args.oldconfig: + output_format = 'oldsearch' + else: + output_format = 'search' + else: + if args.bookmark_output: + output_format = 'bookmark' + elif args.quickmark_output: + output_format = 'quickmark' + if args.import_bookmarks: + bookmark_types.append('bookmark') + if args.import_keywords: + bookmark_types.append('keyword') + if not bookmark_types: + bookmark_types = ['bookmark', 'keyword'] + if not output_format: + output_format = 'quickmark' + if not input_format: + if args.browser: + input_format = browser_default_input_format[args.browser] + else: + #default to netscape + input_format = 'netscape' + + import_function = { + 'netscape': import_netscape_bookmarks, + 'mozilla': import_moz_places, + 'chrome': import_chrome, + } + import_function[input_format](args.bookmarks, bookmark_types, + output_format) + + +def get_args(): + """Get the argparse parser.""" + parser = argparse.ArgumentParser( + epilog="To import bookmarks from Chromium, Firefox or IE, " + "export them to HTML in your browsers bookmark manager. ") + parser.add_argument( + 'browser', + help="Which browser? {%(choices)s}", + choices=browser_default_input_format.keys(), + nargs='?', + metavar='browser') + parser.add_argument( + '-i', + '--input-format', + help='Which input format? (overrides browser default; "netscape" if ' + 'neither given)', + choices=set(browser_default_input_format.values()), + required=False) + parser.add_argument( + '-b', + '--bookmark-output', + help="Output in bookmark format.", + action='store_true', + default=False, + required=False) + parser.add_argument( + '-q', + '--quickmark-output', + help="Output in quickmark format (default).", + action='store_true', + default=False, + required=False) + parser.add_argument( + '-s', + '--search-output', + help="Output config.py search engine format (negates -B and -K)", + action='store_true', + default=False, + required=False) + parser.add_argument( + '--oldconfig', + help="Output search engine format for old qutebrowser.conf format", + default=False, + action='store_true', + required=False) + parser.add_argument( + '-B', + '--import-bookmarks', + help="Import plain bookmarks (can be combiend with -K)", + action='store_true', + default=False, + required=False) + parser.add_argument( + '-K', + '--import-keywords', + help="Import keywords (can be combined with -B)", + action='store_true', + default=False, + required=False) + parser.add_argument( + 'bookmarks', + help="Bookmarks file (html format) or " + "profile folder (Mozilla format)") + args = parser.parse_args() + return args + + +def search_escape(url): + """Escape URLs such that preexisting { and } are handled properly. + + Will obviously trash a properly-formatted qutebrowser URL. + """ + return url.replace('{', '{{').replace('}', '}}') + + +def opensearch_convert(url): + """Convert a basic OpenSearch URL into something qutebrowser can use. + + Exceptions: + KeyError: + An unknown and required parameter is present in the URL. This + usually means there's browser/addon specific functionality needed + to build the URL (I'm looking at you and your browser, Google) that + obviously won't be present here. + """ + subst = { + 'searchTerms': '%s', # for proper escaping later + 'language': '*', + 'inputEncoding': 'UTF-8', + 'outputEncoding': 'UTF-8' + } + + # remove optional parameters (even those we don't support) + for param in string.Formatter().parse(url): + if param[1]: + if param[1].endswith('?'): + url = url.replace('{' + param[1] + '}', '') + elif param[2] and param[2].endswith('?'): + url = url.replace('{' + param[1] + ':' + param[2] + '}', '') + return search_escape(url.format(**subst)).replace('%s', '{}') + + +def import_netscape_bookmarks(bookmarks_file, bookmark_types, output_format): + """Import bookmarks from a NETSCAPE-Bookmark-file v1. + + Generated by Chromium, Firefox, IE and possibly more browsers. Not all + export all possible bookmark types: + - Firefox mostly works with everything + - Chrome doesn't support keywords at all; searches are a separate + database + """ + import bs4 + with open(bookmarks_file, encoding='utf-8') as f: + soup = bs4.BeautifulSoup(f, 'html.parser') + bookmark_query = { + 'search': lambda tag: ( + (tag.name == 'a') and + ('shortcuturl' in tag.attrs) and + ('%s' in tag['href'])), + 'keyword': lambda tag: ( + (tag.name == 'a') and + ('shortcuturl' in tag.attrs) and + ('%s' not in tag['href'])), + 'bookmark': lambda tag: ( + (tag.name == 'a') and + ('shortcuturl' not in tag.attrs) and + (tag.string)), + } + output_template = { + 'search': { + 'search': + "c.url.searchengines['{tag[shortcuturl]}'] = " + "'{tag[href]}' #{tag.string}" + }, + 'oldsearch': { + 'search': '{tag[shortcuturl]} = {tag[href]} #{tag.string}', + }, + 'bookmark': { + 'bookmark': '{tag[href]} {tag.string}', + 'keyword': '{tag[href]} {tag.string}' + }, + 'quickmark': { + 'bookmark': '{tag.string} {tag[href]}', + 'keyword': '{tag[shortcuturl]} {tag[href]}' + } + } + bookmarks = [] + for typ in bookmark_types: + tags = soup.findAll(bookmark_query[typ]) + for tag in tags: + if typ == 'search': + tag['href'] = search_escape(tag['href']).replace('%s', '{}') + if tag['href'] not in bookmarks: + bookmarks.append( + output_template[output_format][typ].format(tag=tag)) + for bookmark in bookmarks: + print(bookmark) + + +def import_moz_places(profile, bookmark_types, output_format): + """Import bookmarks from a Mozilla profile's places.sqlite database.""" + place_query = { + 'bookmark': ( + "SELECT DISTINCT moz_bookmarks.title,moz_places.url " + "FROM moz_bookmarks,moz_places " + "WHERE moz_places.id=moz_bookmarks.fk " + "AND moz_places.id NOT IN (SELECT place_id FROM moz_keywords) " + "AND moz_places.url NOT LIKE 'place:%';" + ), # Bookmarks with no keywords assigned + 'keyword': ( + "SELECT moz_keywords.keyword,moz_places.url " + "FROM moz_keywords,moz_places,moz_bookmarks " + "WHERE moz_places.id=moz_bookmarks.fk " + "AND moz_places.id=moz_keywords.place_id " + "AND moz_places.url NOT LIKE '%!%s%' ESCAPE '!';" + ), # Bookmarks with keywords assigned but no %s substitution + 'search': ( + "SELECT moz_keywords.keyword, " + " moz_bookmarks.title, " + " search_conv(moz_places.url) AS url " + "FROM moz_keywords,moz_places,moz_bookmarks " + "WHERE moz_places.id=moz_bookmarks.fk " + "AND moz_places.id=moz_keywords.place_id " + "AND moz_places.url LIKE '%!%s%' ESCAPE '!';" + ) # bookmarks with keyword and %s substitution + } + out_template = { + 'bookmark': { + 'bookmark': '{url} {title}', + 'keyword': '{url} {keyword}' + }, + 'quickmark': { + 'bookmark': '{title} {url}', + 'keyword': '{keyword} {url}' + }, + 'oldsearch': { + 'search': '{keyword} {url} #{title}' + }, + 'search': { + 'search': "c.url.searchengines['{keyword}'] = '{url}' #{title}" + } + } + + def search_conv(url): + return search_escape(url).replace('%s', '{}') + + places = sqlite3.connect(os.path.join(profile, "places.sqlite")) + places.create_function('search_conv', 1, search_conv) + places.row_factory = sqlite3.Row + c = places.cursor() + for typ in bookmark_types: + c.execute(place_query[typ]) + for row in c: + print(out_template[output_format][typ].format(**row)) + + +def import_chrome(profile, bookmark_types, output_format): + """Import bookmarks and search keywords from Chrome-type profiles. + + On Chrome, keywords and search engines are the same thing and handled in + their own database table; bookmarks cannot have associated keywords. This + is why the dictionary lookups here are much simpler. + """ + out_template = { + 'bookmark': '{url} {name}', + 'quickmark': '{name} {url}', + 'search': "c.url.searchengines['{keyword}'] = '{url}'", + 'oldsearch': '{keyword} {url}' + } + + if 'search' in bookmark_types: + webdata = sqlite3.connect(os.path.join(profile, 'Web Data')) + c = webdata.cursor() + c.execute('SELECT keyword,url FROM keywords;') + for keyword, url in c: + try: + url = opensearch_convert(url) + print(out_template[output_format].format( + keyword=keyword, url=url)) + except KeyError: + print('# Unsupported parameter in url for {}; skipping....'. + format(keyword)) + + else: + with open(os.path.join(profile, 'Bookmarks'), encoding='utf-8') as f: + bookmarks = json.load(f) + + def bm_tree_walk(bm, template): + """Recursive function to walk through bookmarks.""" + if not isinstance(bm, dict): + return + assert 'type' in bm, bm + if bm['type'] == 'url': + if urllib.parse.urlparse(bm['url']).scheme != 'chrome': + print(template.format(**bm)) + elif bm['type'] == 'folder': + for child in bm['children']: + bm_tree_walk(child, template) + + for root in bookmarks['roots'].values(): + bm_tree_walk(root, out_template[output_format]) + + +if __name__ == '__main__': + main() |