First Commit

This commit is contained in:
Christian Torbidone 2024-08-01 15:20:55 +02:00
parent d3c011306d
commit 773a5e5e8a
5 changed files with 153 additions and 125 deletions

6
.idea/vcs.xml Normal file
View file

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

View file

@ -1,5 +1,5 @@
plugins { plugins {
id("org.gradle.toolchains.foojay-resolver-convention") version "0.8.0" id("org.gradle.toolchains.foojay-resolver-convention") version "0.8.0"
} }
rootProject.name = "sitemap_utils" rootProject.name = "sitemap"

View file

@ -1,128 +1,14 @@
package org.thundernetwork package org.thundernetwork.sitemap
import java.io.File import org.thundernetwork.sitemap.models.UrlEntry
import java.time.LocalDateTime
import java.time.format.DateTimeFormatter
import javax.xml.parsers.DocumentBuilderFactory
import javax.xml.transform.OutputKeys
import javax.xml.transform.TransformerFactory
import javax.xml.transform.dom.DOMSource
import javax.xml.transform.stream.StreamResult
class SitemapGenerator(private val directoryPath: String) { fun main() {
private val docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder() val urlEntries = listOf(
private val transformer = TransformerFactory.newInstance().newTransformer().apply { UrlEntry("https://www.example.com/", "2024-07-01", "monthly", 1.0),
setOutputProperty(OutputKeys.INDENT, "yes") UrlEntry("https://www.example.com/about", "2024-07-01", "monthly", 0.8),
setOutputProperty("{https://xml.apache.org/xslt}indent-amount", "2") UrlEntry("https://www.example.com/contact", "2024-07-01", "monthly", 0.8)
} )
private val maxUrlsPerSitemap = 50000
private var sitemapCount = 0
private val sitemapIndexFile = File("$directoryPath/sitemap_index.xml")
init { val generator = SitemapGenerator(urlEntries)
if (!sitemapIndexFile.exists()) { generator.generateSitemap("sitemap.xml")
val doc = docBuilder.newDocument()
val rootElement = doc.createElement("sitemapindex")
rootElement.setAttribute("xmlns", "https://www.sitemaps.org/schemas/sitemap/0.9")
doc.appendChild(rootElement)
saveDocument(doc, sitemapIndexFile)
} else {
val doc = docBuilder.parse(sitemapIndexFile)
sitemapCount = doc.getElementsByTagName("sitemap").length
}
}
private fun getSitemapFile(index: Int): File {
return File("$directoryPath/sitemap_$index.xml")
}
private fun getDocument(file: File): org.w3c.dom.Document {
return if (file.exists()) {
docBuilder.parse(file)
} else {
val doc = docBuilder.newDocument()
val rootElement = doc.createElement("urlset")
rootElement.setAttribute("xmlns", "https://www.sitemaps.org/schemas/sitemap/0.9")
doc.appendChild(rootElement)
doc
}
}
fun addUrl(url: String, lastmod: String? = null, changefreq: String? = null, priority: String? = null) {
val currentSitemapFile = getSitemapFile(sitemapCount)
var doc = getDocument(currentSitemapFile)
var root = doc.documentElement
if (root.getElementsByTagName("url").length >= maxUrlsPerSitemap) {
sitemapCount++
doc = getDocument(getSitemapFile(sitemapCount))
root = doc.documentElement
updateSitemapIndex()
}
val urlElement = doc.createElement("url")
val locElement = doc.createElement("loc")
locElement.textContent = url
urlElement.appendChild(locElement)
val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ssXXX")
val now = LocalDateTime.now().format(formatter)
val lastmodElement = doc.createElement("lastmod")
lastmodElement.textContent = lastmod ?: now
urlElement.appendChild(lastmodElement)
changefreq?.let {
val changefreqElement = doc.createElement("changefreq")
changefreqElement.textContent = it
urlElement.appendChild(changefreqElement)
}
priority?.let {
val priorityElement = doc.createElement("priority")
priorityElement.textContent = it
urlElement.appendChild(priorityElement)
}
root.appendChild(urlElement)
saveDocument(doc, currentSitemapFile)
}
fun removeUrl(url: String) {
for (i in 0..sitemapCount) {
val sitemapFile = getSitemapFile(i)
if (sitemapFile.exists()) {
val doc = getDocument(sitemapFile)
val root = doc.documentElement
val urlNodes = root.getElementsByTagName("url")
for (j in 0 until urlNodes.length) {
val urlNode = urlNodes.item(j)
val locNode = urlNode.childNodes.item(1)
if (locNode.textContent == url) {
root.removeChild(urlNode)
saveDocument(doc, sitemapFile)
return
}
}
}
}
}
private fun updateSitemapIndex() {
val doc = docBuilder.parse(sitemapIndexFile)
val root = doc.documentElement
val sitemapElement = doc.createElement("sitemap")
val locElement = doc.createElement("loc")
locElement.textContent = "sitemap_$sitemapCount.xml"
sitemapElement.appendChild(locElement)
root.appendChild(sitemapElement)
saveDocument(doc, sitemapIndexFile)
}
private fun saveDocument(doc: org.w3c.dom.Document, file: File) {
val source = DOMSource(doc)
val result = StreamResult(file)
transformer.transform(source, result)
}
} }

View file

@ -0,0 +1,128 @@
package org.thundernetwork.sitemap
import org.thundernetwork.sitemap.models.UrlEntry
import org.w3c.dom.Document
import org.w3c.dom.Element
import java.io.File
import javax.xml.parsers.DocumentBuilderFactory
import javax.xml.transform.OutputKeys
import javax.xml.transform.TransformerFactory
import javax.xml.transform.dom.DOMSource
import javax.xml.transform.stream.StreamResult
class SitemapGenerator(private val large: Boolean, private val path: String) {
private val urlEntries = mutableListOf<UrlEntry>()
fun loadSitemap(filePath: String) {
val docFactory = DocumentBuilderFactory.newInstance().newDocumentBuilder()
val doc: Document = docFactory.parse(File(filePath))
val urlNodes = doc.getElementsByTagName("url")
for (i in 0 until urlNodes.length) {
val urlElement = urlNodes.item(i) as Element
val loc = urlElement.getElementsByTagName("loc").item(0).textContent
val lastmod = urlElement.getElementsByTagName("lastmod")?.item(0)?.textContent
val changefreq = urlElement.getElementsByTagName("changefreq")?.item(0)?.textContent
val priority = urlElement.getElementsByTagName("priority")?.item(0)?.textContent?.toDouble()
urlEntries.add(UrlEntry(loc, lastmod, changefreq, priority))
}
}
fun addUrl(urlEntry: UrlEntry) {
urlEntries.add(urlEntry)
}
fun removeUrl(loc: String) {
urlEntries.removeAll { it.loc == loc }
}
fun generateSitemap() {
if (large && urlEntries.size > 10000) {
generateLargeSitemap()
} else {
generateSingleSitemap(path)
}
}
private fun generateSingleSitemap(filePath: String) {
val docFactory = DocumentBuilderFactory.newInstance().newDocumentBuilder()
val doc: Document = docFactory.newDocument()
// Root element
val urlset: Element = doc.createElement("urlset")
urlset.setAttribute("xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9")
doc.appendChild(urlset)
for (urlEntry in urlEntries) {
val url: Element = doc.createElement("url")
val loc: Element = doc.createElement("loc")
loc.appendChild(doc.createTextNode(urlEntry.loc))
url.appendChild(loc)
urlEntry.lastmod?.let {
val lastmod: Element = doc.createElement("lastmod")
lastmod.appendChild(doc.createTextNode(it))
url.appendChild(lastmod)
}
urlEntry.changefreq?.let {
val changefreq: Element = doc.createElement("changefreq")
changefreq.appendChild(doc.createTextNode(it))
url.appendChild(changefreq)
}
urlEntry.priority?.let {
val priority: Element = doc.createElement("priority")
priority.appendChild(doc.createTextNode(it.toString()))
url.appendChild(priority)
}
urlset.appendChild(url)
}
// Write the content into XML file
val transformerFactory = TransformerFactory.newInstance()
val transformer = transformerFactory.newTransformer()
transformer.setOutputProperty(OutputKeys.INDENT, "yes")
val source = DOMSource(doc)
val result = StreamResult(File(filePath))
transformer.transform(source, result)
}
private fun generateLargeSitemap() {
val dir = File(path)
if (!dir.exists()) {
dir.mkdirs()
}
val numSitemaps = (urlEntries.size + 9999) / 10000
val sitemapIndexDocFactory = DocumentBuilderFactory.newInstance().newDocumentBuilder()
val sitemapIndexDoc: Document = sitemapIndexDocFactory.newDocument()
val sitemapIndex: Element = sitemapIndexDoc.createElement("sitemapindex")
sitemapIndex.setAttribute("xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9")
sitemapIndexDoc.appendChild(sitemapIndex)
urlEntries.chunked(10000).forEachIndexed { index, chunk ->
val sitemapFilePath = "$path/sitemap-$index.xml"
generateSingleSitemap(sitemapFilePath)
val sitemap: Element = sitemapIndexDoc.createElement("sitemap")
val loc: Element = sitemapIndexDoc.createElement("loc")
loc.appendChild(sitemapIndexDoc.createTextNode("$path/sitemap-$index.xml"))
sitemap.appendChild(loc)
sitemapIndex.appendChild(sitemap)
}
val indexFilePath = "$path.xml"
val transformerFactory = TransformerFactory.newInstance()
val transformer = transformerFactory.newTransformer()
transformer.setOutputProperty(OutputKeys.INDENT, "yes")
val source = DOMSource(sitemapIndexDoc)
val result = StreamResult(File(indexFilePath))
transformer.transform(source, result)
}
}

View file

@ -0,0 +1,8 @@
package org.thundernetwork.sitemap.models
data class UrlEntry(
val loc: String,
val lastmod: String? = null,
val changefreq: String? = null,
val priority: Double? = null
)