First Commit
This commit is contained in:
parent
d3c011306d
commit
773a5e5e8a
5 changed files with 153 additions and 125 deletions
6
.idea/vcs.xml
Normal file
6
.idea/vcs.xml
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="VcsDirectoryMappings">
|
||||||
|
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||||
|
</component>
|
||||||
|
</project>
|
|
@ -1,5 +1,5 @@
|
||||||
plugins {
|
plugins {
|
||||||
id("org.gradle.toolchains.foojay-resolver-convention") version "0.8.0"
|
id("org.gradle.toolchains.foojay-resolver-convention") version "0.8.0"
|
||||||
}
|
}
|
||||||
rootProject.name = "sitemap_utils"
|
rootProject.name = "sitemap"
|
||||||
|
|
||||||
|
|
|
@ -1,128 +1,14 @@
|
||||||
package org.thundernetwork
|
package org.thundernetwork.sitemap
|
||||||
|
|
||||||
import java.io.File
|
import org.thundernetwork.sitemap.models.UrlEntry
|
||||||
import java.time.LocalDateTime
|
|
||||||
import java.time.format.DateTimeFormatter
|
|
||||||
import javax.xml.parsers.DocumentBuilderFactory
|
|
||||||
import javax.xml.transform.OutputKeys
|
|
||||||
import javax.xml.transform.TransformerFactory
|
|
||||||
import javax.xml.transform.dom.DOMSource
|
|
||||||
import javax.xml.transform.stream.StreamResult
|
|
||||||
|
|
||||||
class SitemapGenerator(private val directoryPath: String) {
|
fun main() {
|
||||||
private val docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder()
|
val urlEntries = listOf(
|
||||||
private val transformer = TransformerFactory.newInstance().newTransformer().apply {
|
UrlEntry("https://www.example.com/", "2024-07-01", "monthly", 1.0),
|
||||||
setOutputProperty(OutputKeys.INDENT, "yes")
|
UrlEntry("https://www.example.com/about", "2024-07-01", "monthly", 0.8),
|
||||||
setOutputProperty("{https://xml.apache.org/xslt}indent-amount", "2")
|
UrlEntry("https://www.example.com/contact", "2024-07-01", "monthly", 0.8)
|
||||||
}
|
)
|
||||||
private val maxUrlsPerSitemap = 50000
|
|
||||||
private var sitemapCount = 0
|
|
||||||
private val sitemapIndexFile = File("$directoryPath/sitemap_index.xml")
|
|
||||||
|
|
||||||
init {
|
val generator = SitemapGenerator(urlEntries)
|
||||||
if (!sitemapIndexFile.exists()) {
|
generator.generateSitemap("sitemap.xml")
|
||||||
val doc = docBuilder.newDocument()
|
|
||||||
val rootElement = doc.createElement("sitemapindex")
|
|
||||||
rootElement.setAttribute("xmlns", "https://www.sitemaps.org/schemas/sitemap/0.9")
|
|
||||||
doc.appendChild(rootElement)
|
|
||||||
saveDocument(doc, sitemapIndexFile)
|
|
||||||
} else {
|
|
||||||
val doc = docBuilder.parse(sitemapIndexFile)
|
|
||||||
sitemapCount = doc.getElementsByTagName("sitemap").length
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun getSitemapFile(index: Int): File {
|
|
||||||
return File("$directoryPath/sitemap_$index.xml")
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun getDocument(file: File): org.w3c.dom.Document {
|
|
||||||
return if (file.exists()) {
|
|
||||||
docBuilder.parse(file)
|
|
||||||
} else {
|
|
||||||
val doc = docBuilder.newDocument()
|
|
||||||
val rootElement = doc.createElement("urlset")
|
|
||||||
rootElement.setAttribute("xmlns", "https://www.sitemaps.org/schemas/sitemap/0.9")
|
|
||||||
doc.appendChild(rootElement)
|
|
||||||
doc
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fun addUrl(url: String, lastmod: String? = null, changefreq: String? = null, priority: String? = null) {
|
|
||||||
val currentSitemapFile = getSitemapFile(sitemapCount)
|
|
||||||
var doc = getDocument(currentSitemapFile)
|
|
||||||
var root = doc.documentElement
|
|
||||||
|
|
||||||
if (root.getElementsByTagName("url").length >= maxUrlsPerSitemap) {
|
|
||||||
sitemapCount++
|
|
||||||
doc = getDocument(getSitemapFile(sitemapCount))
|
|
||||||
root = doc.documentElement
|
|
||||||
updateSitemapIndex()
|
|
||||||
}
|
|
||||||
|
|
||||||
val urlElement = doc.createElement("url")
|
|
||||||
val locElement = doc.createElement("loc")
|
|
||||||
locElement.textContent = url
|
|
||||||
urlElement.appendChild(locElement)
|
|
||||||
|
|
||||||
val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ssXXX")
|
|
||||||
val now = LocalDateTime.now().format(formatter)
|
|
||||||
val lastmodElement = doc.createElement("lastmod")
|
|
||||||
lastmodElement.textContent = lastmod ?: now
|
|
||||||
urlElement.appendChild(lastmodElement)
|
|
||||||
|
|
||||||
changefreq?.let {
|
|
||||||
val changefreqElement = doc.createElement("changefreq")
|
|
||||||
changefreqElement.textContent = it
|
|
||||||
urlElement.appendChild(changefreqElement)
|
|
||||||
}
|
|
||||||
|
|
||||||
priority?.let {
|
|
||||||
val priorityElement = doc.createElement("priority")
|
|
||||||
priorityElement.textContent = it
|
|
||||||
urlElement.appendChild(priorityElement)
|
|
||||||
}
|
|
||||||
|
|
||||||
root.appendChild(urlElement)
|
|
||||||
saveDocument(doc, currentSitemapFile)
|
|
||||||
}
|
|
||||||
|
|
||||||
fun removeUrl(url: String) {
|
|
||||||
for (i in 0..sitemapCount) {
|
|
||||||
val sitemapFile = getSitemapFile(i)
|
|
||||||
if (sitemapFile.exists()) {
|
|
||||||
val doc = getDocument(sitemapFile)
|
|
||||||
val root = doc.documentElement
|
|
||||||
val urlNodes = root.getElementsByTagName("url")
|
|
||||||
for (j in 0 until urlNodes.length) {
|
|
||||||
val urlNode = urlNodes.item(j)
|
|
||||||
val locNode = urlNode.childNodes.item(1)
|
|
||||||
if (locNode.textContent == url) {
|
|
||||||
root.removeChild(urlNode)
|
|
||||||
saveDocument(doc, sitemapFile)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun updateSitemapIndex() {
|
|
||||||
val doc = docBuilder.parse(sitemapIndexFile)
|
|
||||||
val root = doc.documentElement
|
|
||||||
|
|
||||||
val sitemapElement = doc.createElement("sitemap")
|
|
||||||
val locElement = doc.createElement("loc")
|
|
||||||
locElement.textContent = "sitemap_$sitemapCount.xml"
|
|
||||||
sitemapElement.appendChild(locElement)
|
|
||||||
|
|
||||||
root.appendChild(sitemapElement)
|
|
||||||
saveDocument(doc, sitemapIndexFile)
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun saveDocument(doc: org.w3c.dom.Document, file: File) {
|
|
||||||
val source = DOMSource(doc)
|
|
||||||
val result = StreamResult(file)
|
|
||||||
transformer.transform(source, result)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
128
src/main/kotlin/SitemapGenerator.kt
Normal file
128
src/main/kotlin/SitemapGenerator.kt
Normal file
|
@ -0,0 +1,128 @@
|
||||||
|
package org.thundernetwork.sitemap
|
||||||
|
|
||||||
|
import org.thundernetwork.sitemap.models.UrlEntry
|
||||||
|
import org.w3c.dom.Document
|
||||||
|
import org.w3c.dom.Element
|
||||||
|
import java.io.File
|
||||||
|
import javax.xml.parsers.DocumentBuilderFactory
|
||||||
|
import javax.xml.transform.OutputKeys
|
||||||
|
import javax.xml.transform.TransformerFactory
|
||||||
|
import javax.xml.transform.dom.DOMSource
|
||||||
|
import javax.xml.transform.stream.StreamResult
|
||||||
|
|
||||||
|
class SitemapGenerator(private val large: Boolean, private val path: String) {
|
||||||
|
|
||||||
|
private val urlEntries = mutableListOf<UrlEntry>()
|
||||||
|
|
||||||
|
fun loadSitemap(filePath: String) {
|
||||||
|
val docFactory = DocumentBuilderFactory.newInstance().newDocumentBuilder()
|
||||||
|
val doc: Document = docFactory.parse(File(filePath))
|
||||||
|
val urlNodes = doc.getElementsByTagName("url")
|
||||||
|
|
||||||
|
for (i in 0 until urlNodes.length) {
|
||||||
|
val urlElement = urlNodes.item(i) as Element
|
||||||
|
|
||||||
|
val loc = urlElement.getElementsByTagName("loc").item(0).textContent
|
||||||
|
val lastmod = urlElement.getElementsByTagName("lastmod")?.item(0)?.textContent
|
||||||
|
val changefreq = urlElement.getElementsByTagName("changefreq")?.item(0)?.textContent
|
||||||
|
val priority = urlElement.getElementsByTagName("priority")?.item(0)?.textContent?.toDouble()
|
||||||
|
|
||||||
|
urlEntries.add(UrlEntry(loc, lastmod, changefreq, priority))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fun addUrl(urlEntry: UrlEntry) {
|
||||||
|
urlEntries.add(urlEntry)
|
||||||
|
}
|
||||||
|
|
||||||
|
fun removeUrl(loc: String) {
|
||||||
|
urlEntries.removeAll { it.loc == loc }
|
||||||
|
}
|
||||||
|
|
||||||
|
fun generateSitemap() {
|
||||||
|
if (large && urlEntries.size > 10000) {
|
||||||
|
generateLargeSitemap()
|
||||||
|
} else {
|
||||||
|
generateSingleSitemap(path)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun generateSingleSitemap(filePath: String) {
|
||||||
|
val docFactory = DocumentBuilderFactory.newInstance().newDocumentBuilder()
|
||||||
|
val doc: Document = docFactory.newDocument()
|
||||||
|
|
||||||
|
// Root element
|
||||||
|
val urlset: Element = doc.createElement("urlset")
|
||||||
|
urlset.setAttribute("xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9")
|
||||||
|
doc.appendChild(urlset)
|
||||||
|
|
||||||
|
for (urlEntry in urlEntries) {
|
||||||
|
val url: Element = doc.createElement("url")
|
||||||
|
|
||||||
|
val loc: Element = doc.createElement("loc")
|
||||||
|
loc.appendChild(doc.createTextNode(urlEntry.loc))
|
||||||
|
url.appendChild(loc)
|
||||||
|
|
||||||
|
urlEntry.lastmod?.let {
|
||||||
|
val lastmod: Element = doc.createElement("lastmod")
|
||||||
|
lastmod.appendChild(doc.createTextNode(it))
|
||||||
|
url.appendChild(lastmod)
|
||||||
|
}
|
||||||
|
|
||||||
|
urlEntry.changefreq?.let {
|
||||||
|
val changefreq: Element = doc.createElement("changefreq")
|
||||||
|
changefreq.appendChild(doc.createTextNode(it))
|
||||||
|
url.appendChild(changefreq)
|
||||||
|
}
|
||||||
|
|
||||||
|
urlEntry.priority?.let {
|
||||||
|
val priority: Element = doc.createElement("priority")
|
||||||
|
priority.appendChild(doc.createTextNode(it.toString()))
|
||||||
|
url.appendChild(priority)
|
||||||
|
}
|
||||||
|
|
||||||
|
urlset.appendChild(url)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write the content into XML file
|
||||||
|
val transformerFactory = TransformerFactory.newInstance()
|
||||||
|
val transformer = transformerFactory.newTransformer()
|
||||||
|
transformer.setOutputProperty(OutputKeys.INDENT, "yes")
|
||||||
|
val source = DOMSource(doc)
|
||||||
|
val result = StreamResult(File(filePath))
|
||||||
|
transformer.transform(source, result)
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun generateLargeSitemap() {
|
||||||
|
val dir = File(path)
|
||||||
|
if (!dir.exists()) {
|
||||||
|
dir.mkdirs()
|
||||||
|
}
|
||||||
|
|
||||||
|
val numSitemaps = (urlEntries.size + 9999) / 10000
|
||||||
|
val sitemapIndexDocFactory = DocumentBuilderFactory.newInstance().newDocumentBuilder()
|
||||||
|
val sitemapIndexDoc: Document = sitemapIndexDocFactory.newDocument()
|
||||||
|
val sitemapIndex: Element = sitemapIndexDoc.createElement("sitemapindex")
|
||||||
|
sitemapIndex.setAttribute("xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9")
|
||||||
|
sitemapIndexDoc.appendChild(sitemapIndex)
|
||||||
|
|
||||||
|
urlEntries.chunked(10000).forEachIndexed { index, chunk ->
|
||||||
|
val sitemapFilePath = "$path/sitemap-$index.xml"
|
||||||
|
generateSingleSitemap(sitemapFilePath)
|
||||||
|
|
||||||
|
val sitemap: Element = sitemapIndexDoc.createElement("sitemap")
|
||||||
|
val loc: Element = sitemapIndexDoc.createElement("loc")
|
||||||
|
loc.appendChild(sitemapIndexDoc.createTextNode("$path/sitemap-$index.xml"))
|
||||||
|
sitemap.appendChild(loc)
|
||||||
|
sitemapIndex.appendChild(sitemap)
|
||||||
|
}
|
||||||
|
|
||||||
|
val indexFilePath = "$path.xml"
|
||||||
|
val transformerFactory = TransformerFactory.newInstance()
|
||||||
|
val transformer = transformerFactory.newTransformer()
|
||||||
|
transformer.setOutputProperty(OutputKeys.INDENT, "yes")
|
||||||
|
val source = DOMSource(sitemapIndexDoc)
|
||||||
|
val result = StreamResult(File(indexFilePath))
|
||||||
|
transformer.transform(source, result)
|
||||||
|
}
|
||||||
|
}
|
8
src/main/kotlin/models/UrlEntry.kt
Normal file
8
src/main/kotlin/models/UrlEntry.kt
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
package org.thundernetwork.sitemap.models
|
||||||
|
|
||||||
|
data class UrlEntry(
|
||||||
|
val loc: String,
|
||||||
|
val lastmod: String? = null,
|
||||||
|
val changefreq: String? = null,
|
||||||
|
val priority: Double? = null
|
||||||
|
)
|
Loading…
Reference in a new issue