-
Notifications
You must be signed in to change notification settings - Fork 18
/
sitemap.go
137 lines (120 loc) · 4.8 KB
/
sitemap.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
// Package sitemap provides primitives for high effective parsing of huge
// sitemap files.
package sitemap
import (
"encoding/xml"
"io"
"net/http"
"os"
"time"
)
// Frequency is a type alias for change frequency.
type Frequency = string
// Change frequency constants set describes how frequently a page is changed.
const (
Always Frequency = "always" // A page is changed always
Hourly Frequency = "hourly" // A page is changed every hour
Daily Frequency = "daily" // A page is changed every day
Weekly Frequency = "weekly" // A page is changed every week
Monthly Frequency = "monthly" // A page is changed every month
Yearly Frequency = "yearly" // A page is changed every year
Never Frequency = "never" // A page is changed never
)
// Entry is an interface describes an element \ an URL in the sitemap file.
// Keep in mind. It is implemented by a totally immutable entity so you should
// minimize calls count because it can produce additional memory allocations.
//
// GetLocation returns URL of the page.
// GetLocation must return a non-nil and not empty string value.
//
// GetLastModified parses and returns date and time of last modification of the page.
// GetLastModified can return nil or a valid time.Time instance.
// Be careful. Each call return new time.Time instance.
//
// GetChangeFrequency returns string value indicates how frequent the page is changed.
// GetChangeFrequency returns non-nil string value. See Frequency consts set.
//
// GetPriority return priority of the page.
// The valid value is between 0.0 and 1.0, the default value is 0.5.
//
// You shouldn't implement this interface in your types.
type Entry interface {
GetLocation() string
GetLastModified() *time.Time
GetChangeFrequency() Frequency
GetPriority() float32
}
// IndexEntry is an interface describes an element \ an URL in a sitemap index file.
// Keep in mind. It is implemented by a totally immutable entity so you should
// minimize calls count because it can produce additional memory allocations.
//
// GetLocation returns URL of a sitemap file.
// GetLocation must return a non-nil and not empty string value.
//
// GetLastModified parses and returns date and time of last modification of sitemap.
// GetLastModified can return nil or a valid time.Time instance.
// Be careful. Each call return new time.Time instance.
//
// You shouldn't implement this interface in your types.
type IndexEntry interface {
GetLocation() string
GetLastModified() *time.Time
}
// EntryConsumer is a type represents consumer of parsed sitemaps entries
type EntryConsumer func(Entry) error
// Parse parses data which provides by the reader and for each sitemap
// entry calls the consumer's function.
func Parse(reader io.Reader, consumer EntryConsumer) error {
return parseLoop(reader, func(d *xml.Decoder, se *xml.StartElement) error {
return entryParser(d, se, consumer)
})
}
// ParseFromFile reads sitemap from a file, parses it and for each sitemap
// entry calls the consumer's function.
func ParseFromFile(sitemapPath string, consumer EntryConsumer) error {
sitemapFile, err := os.OpenFile(sitemapPath, os.O_RDONLY, os.ModeExclusive)
if err != nil {
return err
}
defer sitemapFile.Close()
return Parse(sitemapFile, consumer)
}
// ParseFromSite downloads sitemap from a site, parses it and for each sitemap
// entry calls the consumer's function.
func ParseFromSite(url string, consumer EntryConsumer) error {
res, err := http.Get(url)
if err != nil {
return err
}
defer res.Body.Close()
return Parse(res.Body, consumer)
}
// IndexEntryConsumer is a type represents consumer of parsed sitemaps indexes entries
type IndexEntryConsumer func(IndexEntry) error
// ParseIndex parses data which provides by the reader and for each sitemap index
// entry calls the consumer's function.
func ParseIndex(reader io.Reader, consumer IndexEntryConsumer) error {
return parseLoop(reader, func(d *xml.Decoder, se *xml.StartElement) error {
return indexEntryParser(d, se, consumer)
})
}
// ParseIndexFromFile reads sitemap index from a file, parses it and for each sitemap
// index entry calls the consumer's function.
func ParseIndexFromFile(sitemapPath string, consumer IndexEntryConsumer) error {
sitemapFile, err := os.OpenFile(sitemapPath, os.O_RDONLY, os.ModeExclusive)
if err != nil {
return err
}
defer sitemapFile.Close()
return ParseIndex(sitemapFile, consumer)
}
// ParseIndexFromSite downloads sitemap index from a site, parses it and for each sitemap
// index entry calls the consumer's function.
func ParseIndexFromSite(sitemapURL string, consumer IndexEntryConsumer) error {
res, err := http.Get(sitemapURL)
if err != nil {
return err
}
defer res.Body.Close()
return ParseIndex(res.Body, consumer)
}