First Commit
This commit is contained in:
parent
d3c011306d
commit
773a5e5e8a
5 changed files with 153 additions and 125 deletions
6
.idea/vcs.xml
Normal file
6
.idea/vcs.xml
Normal file
|
@ -0,0 +1,6 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
|
@ -1,5 +1,5 @@
|
|||
plugins {
|
||||
id("org.gradle.toolchains.foojay-resolver-convention") version "0.8.0"
|
||||
}
|
||||
rootProject.name = "sitemap_utils"
|
||||
rootProject.name = "sitemap"
|
||||
|
||||
|
|
|
@ -1,128 +1,14 @@
|
|||
package org.thundernetwork
|
||||
package org.thundernetwork.sitemap
|
||||
|
||||
import java.io.File
|
||||
import java.time.LocalDateTime
|
||||
import java.time.format.DateTimeFormatter
|
||||
import javax.xml.parsers.DocumentBuilderFactory
|
||||
import javax.xml.transform.OutputKeys
|
||||
import javax.xml.transform.TransformerFactory
|
||||
import javax.xml.transform.dom.DOMSource
|
||||
import javax.xml.transform.stream.StreamResult
|
||||
import org.thundernetwork.sitemap.models.UrlEntry
|
||||
|
||||
class SitemapGenerator(private val directoryPath: String) {
|
||||
private val docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder()
|
||||
private val transformer = TransformerFactory.newInstance().newTransformer().apply {
|
||||
setOutputProperty(OutputKeys.INDENT, "yes")
|
||||
setOutputProperty("{https://xml.apache.org/xslt}indent-amount", "2")
|
||||
}
|
||||
private val maxUrlsPerSitemap = 50000
|
||||
private var sitemapCount = 0
|
||||
private val sitemapIndexFile = File("$directoryPath/sitemap_index.xml")
|
||||
fun main() {
|
||||
val urlEntries = listOf(
|
||||
UrlEntry("https://www.example.com/", "2024-07-01", "monthly", 1.0),
|
||||
UrlEntry("https://www.example.com/about", "2024-07-01", "monthly", 0.8),
|
||||
UrlEntry("https://www.example.com/contact", "2024-07-01", "monthly", 0.8)
|
||||
)
|
||||
|
||||
init {
|
||||
if (!sitemapIndexFile.exists()) {
|
||||
val doc = docBuilder.newDocument()
|
||||
val rootElement = doc.createElement("sitemapindex")
|
||||
rootElement.setAttribute("xmlns", "https://www.sitemaps.org/schemas/sitemap/0.9")
|
||||
doc.appendChild(rootElement)
|
||||
saveDocument(doc, sitemapIndexFile)
|
||||
} else {
|
||||
val doc = docBuilder.parse(sitemapIndexFile)
|
||||
sitemapCount = doc.getElementsByTagName("sitemap").length
|
||||
}
|
||||
}
|
||||
|
||||
private fun getSitemapFile(index: Int): File {
|
||||
return File("$directoryPath/sitemap_$index.xml")
|
||||
}
|
||||
|
||||
private fun getDocument(file: File): org.w3c.dom.Document {
|
||||
return if (file.exists()) {
|
||||
docBuilder.parse(file)
|
||||
} else {
|
||||
val doc = docBuilder.newDocument()
|
||||
val rootElement = doc.createElement("urlset")
|
||||
rootElement.setAttribute("xmlns", "https://www.sitemaps.org/schemas/sitemap/0.9")
|
||||
doc.appendChild(rootElement)
|
||||
doc
|
||||
}
|
||||
}
|
||||
|
||||
fun addUrl(url: String, lastmod: String? = null, changefreq: String? = null, priority: String? = null) {
|
||||
val currentSitemapFile = getSitemapFile(sitemapCount)
|
||||
var doc = getDocument(currentSitemapFile)
|
||||
var root = doc.documentElement
|
||||
|
||||
if (root.getElementsByTagName("url").length >= maxUrlsPerSitemap) {
|
||||
sitemapCount++
|
||||
doc = getDocument(getSitemapFile(sitemapCount))
|
||||
root = doc.documentElement
|
||||
updateSitemapIndex()
|
||||
}
|
||||
|
||||
val urlElement = doc.createElement("url")
|
||||
val locElement = doc.createElement("loc")
|
||||
locElement.textContent = url
|
||||
urlElement.appendChild(locElement)
|
||||
|
||||
val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ssXXX")
|
||||
val now = LocalDateTime.now().format(formatter)
|
||||
val lastmodElement = doc.createElement("lastmod")
|
||||
lastmodElement.textContent = lastmod ?: now
|
||||
urlElement.appendChild(lastmodElement)
|
||||
|
||||
changefreq?.let {
|
||||
val changefreqElement = doc.createElement("changefreq")
|
||||
changefreqElement.textContent = it
|
||||
urlElement.appendChild(changefreqElement)
|
||||
}
|
||||
|
||||
priority?.let {
|
||||
val priorityElement = doc.createElement("priority")
|
||||
priorityElement.textContent = it
|
||||
urlElement.appendChild(priorityElement)
|
||||
}
|
||||
|
||||
root.appendChild(urlElement)
|
||||
saveDocument(doc, currentSitemapFile)
|
||||
}
|
||||
|
||||
fun removeUrl(url: String) {
|
||||
for (i in 0..sitemapCount) {
|
||||
val sitemapFile = getSitemapFile(i)
|
||||
if (sitemapFile.exists()) {
|
||||
val doc = getDocument(sitemapFile)
|
||||
val root = doc.documentElement
|
||||
val urlNodes = root.getElementsByTagName("url")
|
||||
for (j in 0 until urlNodes.length) {
|
||||
val urlNode = urlNodes.item(j)
|
||||
val locNode = urlNode.childNodes.item(1)
|
||||
if (locNode.textContent == url) {
|
||||
root.removeChild(urlNode)
|
||||
saveDocument(doc, sitemapFile)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun updateSitemapIndex() {
|
||||
val doc = docBuilder.parse(sitemapIndexFile)
|
||||
val root = doc.documentElement
|
||||
|
||||
val sitemapElement = doc.createElement("sitemap")
|
||||
val locElement = doc.createElement("loc")
|
||||
locElement.textContent = "sitemap_$sitemapCount.xml"
|
||||
sitemapElement.appendChild(locElement)
|
||||
|
||||
root.appendChild(sitemapElement)
|
||||
saveDocument(doc, sitemapIndexFile)
|
||||
}
|
||||
|
||||
private fun saveDocument(doc: org.w3c.dom.Document, file: File) {
|
||||
val source = DOMSource(doc)
|
||||
val result = StreamResult(file)
|
||||
transformer.transform(source, result)
|
||||
}
|
||||
val generator = SitemapGenerator(urlEntries)
|
||||
generator.generateSitemap("sitemap.xml")
|
||||
}
|
||||
|
|
128
src/main/kotlin/SitemapGenerator.kt
Normal file
128
src/main/kotlin/SitemapGenerator.kt
Normal file
|
@ -0,0 +1,128 @@
|
|||
package org.thundernetwork.sitemap
|
||||
|
||||
import org.thundernetwork.sitemap.models.UrlEntry
|
||||
import org.w3c.dom.Document
|
||||
import org.w3c.dom.Element
|
||||
import java.io.File
|
||||
import javax.xml.parsers.DocumentBuilderFactory
|
||||
import javax.xml.transform.OutputKeys
|
||||
import javax.xml.transform.TransformerFactory
|
||||
import javax.xml.transform.dom.DOMSource
|
||||
import javax.xml.transform.stream.StreamResult
|
||||
|
||||
class SitemapGenerator(private val large: Boolean, private val path: String) {
|
||||
|
||||
private val urlEntries = mutableListOf<UrlEntry>()
|
||||
|
||||
fun loadSitemap(filePath: String) {
|
||||
val docFactory = DocumentBuilderFactory.newInstance().newDocumentBuilder()
|
||||
val doc: Document = docFactory.parse(File(filePath))
|
||||
val urlNodes = doc.getElementsByTagName("url")
|
||||
|
||||
for (i in 0 until urlNodes.length) {
|
||||
val urlElement = urlNodes.item(i) as Element
|
||||
|
||||
val loc = urlElement.getElementsByTagName("loc").item(0).textContent
|
||||
val lastmod = urlElement.getElementsByTagName("lastmod")?.item(0)?.textContent
|
||||
val changefreq = urlElement.getElementsByTagName("changefreq")?.item(0)?.textContent
|
||||
val priority = urlElement.getElementsByTagName("priority")?.item(0)?.textContent?.toDouble()
|
||||
|
||||
urlEntries.add(UrlEntry(loc, lastmod, changefreq, priority))
|
||||
}
|
||||
}
|
||||
|
||||
fun addUrl(urlEntry: UrlEntry) {
|
||||
urlEntries.add(urlEntry)
|
||||
}
|
||||
|
||||
fun removeUrl(loc: String) {
|
||||
urlEntries.removeAll { it.loc == loc }
|
||||
}
|
||||
|
||||
fun generateSitemap() {
|
||||
if (large && urlEntries.size > 10000) {
|
||||
generateLargeSitemap()
|
||||
} else {
|
||||
generateSingleSitemap(path)
|
||||
}
|
||||
}
|
||||
|
||||
private fun generateSingleSitemap(filePath: String) {
|
||||
val docFactory = DocumentBuilderFactory.newInstance().newDocumentBuilder()
|
||||
val doc: Document = docFactory.newDocument()
|
||||
|
||||
// Root element
|
||||
val urlset: Element = doc.createElement("urlset")
|
||||
urlset.setAttribute("xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9")
|
||||
doc.appendChild(urlset)
|
||||
|
||||
for (urlEntry in urlEntries) {
|
||||
val url: Element = doc.createElement("url")
|
||||
|
||||
val loc: Element = doc.createElement("loc")
|
||||
loc.appendChild(doc.createTextNode(urlEntry.loc))
|
||||
url.appendChild(loc)
|
||||
|
||||
urlEntry.lastmod?.let {
|
||||
val lastmod: Element = doc.createElement("lastmod")
|
||||
lastmod.appendChild(doc.createTextNode(it))
|
||||
url.appendChild(lastmod)
|
||||
}
|
||||
|
||||
urlEntry.changefreq?.let {
|
||||
val changefreq: Element = doc.createElement("changefreq")
|
||||
changefreq.appendChild(doc.createTextNode(it))
|
||||
url.appendChild(changefreq)
|
||||
}
|
||||
|
||||
urlEntry.priority?.let {
|
||||
val priority: Element = doc.createElement("priority")
|
||||
priority.appendChild(doc.createTextNode(it.toString()))
|
||||
url.appendChild(priority)
|
||||
}
|
||||
|
||||
urlset.appendChild(url)
|
||||
}
|
||||
|
||||
// Write the content into XML file
|
||||
val transformerFactory = TransformerFactory.newInstance()
|
||||
val transformer = transformerFactory.newTransformer()
|
||||
transformer.setOutputProperty(OutputKeys.INDENT, "yes")
|
||||
val source = DOMSource(doc)
|
||||
val result = StreamResult(File(filePath))
|
||||
transformer.transform(source, result)
|
||||
}
|
||||
|
||||
private fun generateLargeSitemap() {
|
||||
val dir = File(path)
|
||||
if (!dir.exists()) {
|
||||
dir.mkdirs()
|
||||
}
|
||||
|
||||
val numSitemaps = (urlEntries.size + 9999) / 10000
|
||||
val sitemapIndexDocFactory = DocumentBuilderFactory.newInstance().newDocumentBuilder()
|
||||
val sitemapIndexDoc: Document = sitemapIndexDocFactory.newDocument()
|
||||
val sitemapIndex: Element = sitemapIndexDoc.createElement("sitemapindex")
|
||||
sitemapIndex.setAttribute("xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9")
|
||||
sitemapIndexDoc.appendChild(sitemapIndex)
|
||||
|
||||
urlEntries.chunked(10000).forEachIndexed { index, chunk ->
|
||||
val sitemapFilePath = "$path/sitemap-$index.xml"
|
||||
generateSingleSitemap(sitemapFilePath)
|
||||
|
||||
val sitemap: Element = sitemapIndexDoc.createElement("sitemap")
|
||||
val loc: Element = sitemapIndexDoc.createElement("loc")
|
||||
loc.appendChild(sitemapIndexDoc.createTextNode("$path/sitemap-$index.xml"))
|
||||
sitemap.appendChild(loc)
|
||||
sitemapIndex.appendChild(sitemap)
|
||||
}
|
||||
|
||||
val indexFilePath = "$path.xml"
|
||||
val transformerFactory = TransformerFactory.newInstance()
|
||||
val transformer = transformerFactory.newTransformer()
|
||||
transformer.setOutputProperty(OutputKeys.INDENT, "yes")
|
||||
val source = DOMSource(sitemapIndexDoc)
|
||||
val result = StreamResult(File(indexFilePath))
|
||||
transformer.transform(source, result)
|
||||
}
|
||||
}
|
8
src/main/kotlin/models/UrlEntry.kt
Normal file
8
src/main/kotlin/models/UrlEntry.kt
Normal file
|
@ -0,0 +1,8 @@
|
|||
package org.thundernetwork.sitemap.models
|
||||
|
||||
data class UrlEntry(
|
||||
val loc: String,
|
||||
val lastmod: String? = null,
|
||||
val changefreq: String? = null,
|
||||
val priority: Double? = null
|
||||
)
|
Loading…
Reference in a new issue