app/utils/util.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118

import re
from django.apps import apps
from django.template.loader import render_to_string
from django.conf import settings
from bs4 import BeautifulSoup
import markdown


def markdown_to_html(txt):
    md = markdown.Markdown(
        extensions=[
            'markdown.extensions.fenced_code',
            'markdown.extensions.codehilite',
            'markdown.extensions.attr_list',
            'footnotes',
            'extra'
        ],
        extension_configs = {
            'markdown.extensions.codehilite': {
                'css_class': 'highlight', 
                'linenums': False
            },
        },
        output_format='html5',
        safe_mode=False
    )
    return md.convert(txt)


def extract_main_image(markdown):
    soup = BeautifulSoup(markdown, 'html.parser')
    try:
        image = soup.find_all('img')[0]['id']
        img_pk = image.split('image-')[1]
        return apps.get_model('photos', 'LuxImage').objects.get(pk=img_pk)
    except IndexError:
        return None


def parse_image(s):
    soup = BeautifulSoup(s.group(), "lxml")
    for img in soup.find_all('img'):
        try:
            cl = img['class']
            if cl[0] == 'postpic' or cl[0] == 'postpicright':
                s = str(img).replace('[[base_url]]', settings.IMAGES_URL)
                return s
            else:
                try: 
                    image_id = img['id'].split("image-")[1]
                    i = apps.get_model('photos', 'LuxImage').objects.get(pk=image_id)
                    caption = False
                    exif = False
                    cluster_class = None
                    is_cluster = False
                    extra = None
                    if cl[0] == 'cluster':
                        css_class = cl[0]
                        is_cluster = True
                        cluster_class = cl[1]
                        try:
                            if cl[2] == 'caption':
                                caption = True
                            elif cl[2] == 'exif':
                                exif = True
                            else:
                                extra = cl[2]

                            if len(cl) > 3:
                                if cl[3] == 'exif':
                                    exif = True
                        except:
                            pass
                    elif cl[0] != 'cluster' and len(cl) > 1:
                        css_class = cl[0]
                        if cl[1] == 'caption':
                            caption = True
                        if cl[1] == 'exif':
                            exif = True
                    elif cl[0] != 'cluster' and len(cl) > 2:
                        css_class = cl[0]
                        if cl[1] == 'caption':
                            caption = True
                        if cl[2] == 'exif':
                            exif = True
                        print('caption'+str(caption))
                    else:
                        css_class = cl[0]
                    return render_to_string("lib/img_%s.html" % css_class, {'image': i, 'caption': caption, 'exif': exif, 'is_cluster': is_cluster, 'cluster_class': cluster_class, 'extra': extra})
                except KeyError:
                    ''' regular inline image, not a luximage '''
                    return str(img)
        except KeyError:
            ''' regular inline image, not a luximage '''
            return str(img)


def render_images(s):
    s = re.sub('<img(.*)/>', parse_image, s)
    return s


def parse_video(s):
    soup = BeautifulSoup(s, "lxml")
    if soup.find('video'):
        return True
    return False

def parse_reg_bio_page():
    content = requests.get("https://www.theregister.co.uk/Author/Scott-Gilbertson/")
    soup = BeautifulSoup(content, 'html.parser')
    try:
        image = soup.find_all('img')[0]['id']
        img_pk = image.split('image-')[1]
        return apps.get_model('photos', 'LuxImage').objects.get(pk=img_pk)
    except IndexError:
        return None