forked from id774/automaticruby
-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathdescription_link.rb
More file actions
74 lines (63 loc) · 1.94 KB
/
description_link.rb
File metadata and controls
74 lines (63 loc) · 1.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# -*- coding: utf-8 -*-
# Name:: Automatic::Plugin::Filter::DescriptionLink
# Author:: 774 <http://id774.net>
# Created:: Oct 03, 2014
# Updated:: Oct 16, 2014
# Copyright:: Copyright (c) 2014 Automatic Ruby Developers.
# License:: Licensed under the GNU GENERAL PUBLIC LICENSE, Version 3.0.
module Automatic::Plugin
class FilterDescriptionLink
require 'uri'
require 'nkf'
def initialize(config, pipeline=[])
@config = config
@pipeline = pipeline
end
def run
@return_feeds = []
@pipeline.each {|feeds|
new_feeds = []
unless feeds.nil?
feeds.items.each {|feed|
new_feeds << rewrite_link(feed)
}
end
@return_feeds << Automatic::FeedMaker.create_pipeline(new_feeds)
}
@return_feeds
end
private
def get_title(url)
new_title = nil
if url.class == String
url.gsub!(Regexp.new("[^#{URI::PATTERN::ALNUM}\/\:\?\=&~,\.\(\)#]")) {|match| ERB::Util.url_encode(match)}
begin
read_data = NKF.nkf("--utf8", open(url).read)
get_text = Nokogiri::HTML.parse(read_data, nil, 'utf8').xpath('//title').text
new_title = get_text if get_text.class == String
rescue
Automatic::Log.puts("warn", "Failed in get title for: #{url}")
end
end
new_title
end
def rewrite_link(feed)
new_link = URI.extract(feed.description, %w{http https}).uniq.last
feed.link = new_link unless new_link.nil?
if @config.class == Hash
if @config['clear_description'] == 1
feed.description = ""
end
if @config['get_title'] == 1
begin
new_title = get_title(feed.link)
feed.title = new_title unless new_title.nil?
rescue OpenURI::HTTPError
Automatic::Log.puts("warn", "404 Not Found in get title process.")
end
end
end
feed
end
end
end